/* This do file cleans the raw data and generate datasets for regression.*/

capture log close
clear all
cap clear matrix
cap clear mata
set matsize 10000
set maxvar 30000
set more off, permanently

ssc install egenmore, replace
ssc install mdesc, replace
ssc install reghdfe, replace
ssc install ftools, replace
reghdfe, compile
ssc install tuples, replace
ssc install xtivreg2, replace
ssc install ivreg2, replace
ssc install ranktest, replace
ssc install ftools, replace

net from "http://www.nber.org/stata"
net install taxsim9, replace

global wkdir `c(pwd)'
global rawdata "${wkdir}/Data/Raw"
global regdata "${wkdir}/Data/RegData"

log using clean_data.log, replace	


********************************************************************************
*****************************   CPS DATASETS   *********************************
********************************************************************************

			**********************************************
*******************		INPUT AND CLEAN DATA		******************* 
			**********************************************
// March CPS data downloaded from IPUMS
// See IPUMS CPS for variable definitions
use "${rawdata}/CPS/ipums_cps.dta"					

*** Create some household ID variables
egen hhid = group(serial year month)
egen sfamid = group(hhid famunit)

*** Create number of children in each family
gen child=age<18
egen kids = sum(child), by(sfamid)	
replace kids = 4 if kids>4

*** Create unemployed and employed variables
gen employed = empstat==10 | empstat==12
gen unemployed = empstat>=20 & empstat<=22
gen nilf =  empstat>=30 & empstat<=40
for any unemployed employed nilf: replace X=. if empstat==0 | empstat==1
for any unemployed employed: replace X=. if nilf==1


************************
*** Collapsing data 
************************
*** First get pop data
preserve
collapse (rawsum) wtfinl , by(kids statefip year)
rename wtfinl weight_pop
tempfile pop
save `pop', replace
restore

*** Now get unemployment data
keep if age>=15
collapse (mean) employed unemployed (rawsum) wtfinl [pw=wtfinl], by(kids statefip year)
rename wtfinl weight_ur

merge 1:1 kids statefip year using `pop'
drop _merge

sort statefip kids year
rename unemployed urate_ip
for any 1 2 3 4 5: bysort statefip kids: gen urate_ip_lagX=urate_ip[_n-X]
for any 1 2 3: bysort statefip kids: gen urate_ip_leadX=urate_ip[_n+X]
for any 1 2 5: bysort statefip kids: gen urate_ip_chX=(urate_ip-urate_ip[_n-X])/urate_ip[_n-X] if year==year[_n-X]+X

rename employed epop_ip
keep if year>=1993 & year<=2015
save "${rawdata}/StateYear/ipums_urate.dta", replace


			****************************************
*******************   INPUT AND CLEAN DATA   ******************* 
			****************************************
// March CPS data downloaded from NBER. 
// I downloaded all yearly data, and appended them together, only changing variables names so that they are consistently since the 1980s.
// See CPS codebooks for variable definitions
cd "${rawdata}/CPS" 
			
*** Variables needed
local tokeep year h_seq f_pos statefip p_famrel f_type p_famtyp p_relhd p_marst p_marwt redesigned p_age p_sex ///
	p_esr p_lfr p_eit_cred p_inc_ws p_inc_se p_inc_uc p_inc_pa p_inc_sp p_inc_ss p_inc_vet f_mv_fs f_mv_sl 

!unzip "${rawdata}/CPS/marcps88b16.dta.zip"
use `tokeep' using "${rawdata}/CPS/marcps88b16.dta", clear
drop if statefip==0

*** 2014 was redesigned. Fix weights
replace p_marwt=p_marwt/2 if year==2014		//double weights in this year because of redesign

*** Household variables
gen one=1
egen hhid = group(year h_seq)
bysort hhid: egen h_numpers = sum(one)

gen bfhead = 1 if ((p_famrel==1 & f_type!=1 & f_type!=3) | f_type==2 | f_type==5) & year>=1988
replace bfhead = 1 if p_relhd==1 & (f_type==1 | f_type==3) & year>=1988
replace bfhead=0 if bfhead==.

egen sfamid = group(year h_seq f_pos)
gen sfhead = 1 if p_famtyp==2 | p_famtyp==5 | p_famrel==1
replace sfhead=0 if sfhead==.

*** Note: for spending, we want calendar not CPS year. For unemployment rates we need CPS year.
rename year cpsyear
gen year=cpsyear-1											// calendar year, to use for spending stuff
keep if cpsyear>=1990 & cpsyear<=2016		// Our main analysis concentrates on 1993-2015. We also need some lags

*** Add CPI
sort year
merge m:1 year using "${rawdata}/StateYear/cpi_6717.dta"		// cpi merged to calendar year for spending
drop if _merge!=3
drop _merge
		

*********************************	
*** Create unemployed and employed variables
*********************************	
replace p_lfr=p_esr if cpsyear<=1993
gen employed = p_lfr==1 | p_lfr==2
gen unemployed = p_lfr==4 |  p_lfr==3
gen nilf= p_lfr==5 | p_lfr==6 | p_lfr==7
for any unemployed employed nilf: replace X=. if p_lfr==0
for any unemployed employed: replace X=. if nilf==1

*********************************	
*** Fix expenditure data --> make them all at small family level
*********************************	
// Note:  p_inc_uc: UI; h_inc_vet: UI/VET/WC; p_inc_pa: TANF/AFDC; p_eit_cred: EITC; h_mv_fs: Food stamps;
// p_inc_sp: SSI; p_inc_ss: SS; 

** Distribute household values on per capita bases
* HH level
foreach x in mv_fs mv_sl {
	gen tmp`x' = f_`x' if bfhead==1
	egen h_`x' = total(tmp`x'), by(hhid)
	drop tmp`x' 
	gen p_`x' = h_`x'/h_numpers
}

************************
*** Collapsing data 
************************
*** Add number of children
gen child=p_age<18
egen kids = sum(child), by(sfamid year statefip)	
replace kids = 4 if kids>4

*** Save unemployment rates
preserve
keep if p_age>=15
collapse (mean) employed unemployed (rawsum) p_marwt [pw=p_marwt], by(kids statefip cpsyear)
rename cpsyear year
rename p_marwt weight_mar_ur

sort statefip kids year
rename unemployed urate_mar
for any 1 2 3 4 5: bysort statefip kids: gen urate_mar_lagX=urate_mar[_n-X]
for any 1 2 5: bysort statefip kids: gen urate_mar_chX=(urate_mar-urate_mar[_n-X]) if year==year[_n-X]+X

tempfile urate
save `urate'
restore

*** Save program expenditures
collapse (sum) p_inc_uc p_inc_sp p_inc_pa p_inc_ss p_mv_fs (rawsum) p_marwt [pw=p_marwt], by(kids statefip year)
merge 1:1 kids statefip year using `urate'
drop _merge

rename p_marwt weight_mar_prog
rename p_mv_fs fs_spend
rename p_inc_uc ui_spend
rename p_inc_pa afdc_spend
rename p_inc_ss ss_spend
rename p_inc_sp ssi_spend

keep if year>=1993 & year<=2015
save "${rawdata}/StateYear/cpsdata.dta", replace


********************************************************************************
*****************************   SIPP DATASETS   ********************************
********************************************************************************

			******************************************************
***************** 		EXTRACT/CLEAN SIPP DATASETS 		***********************
			******************************************************

****************************
*** 1990-2008 Surveys
****************************
// Data downloaded from NBER
cd "${rawdata}/SIPP"
!unzip SIPP.zip

foreach syear in 1993 1996 2001 2004 2008 {

	cd "${rawdata}/SIPP/Data/Raw/SIPP/`syear'"

	** Extract up to last health wave available in each panel       
	if `syear'==1993 | `syear'==2001 local maxw 9
	if `syear'==1996 | `syear'==2004 local maxw 12
	if `syear'==2008 local maxw 16

	** Clean each wave first
	forvalues i=1(1)`maxw' {
		di ""
		di "THIS LOOP IS FOR YEAR `syear' AND WAVE `i'"
		di ""

		if `syear'==1993 {
			!unzip s`syear'w`i'.dat.zip
		}
		else {
			!gunzip s`syear'w`i'.dat.gz
		}
		clear
		qui do s`syear'w`i'.do
	 
		*** Keep variables of interest
		if `syear'>=1996 keep ssuseq ssuid eentaid shhadid spanel swave srotaton srefmon rhcalmn rhcalyr tfipsst ///
				ehhnumpp rhtype whfnwgt thtotinc thearn tmthrnt rfid efnp tftotinc tfearn rsid esfnp tstotinc epppnum eppintvw ///
				wpfinwgt tage tbyear esex erace eorigin ems epnspous tptotinc tpearn rmesr rwksperm rwkesr1 rwkesr2 rwkesr3 ///
				rwkesr4 rwkesr5 rmwkwjb eeducate emax ejobcntr ersnowrk eawop eabre eptwrk eptresn elkwrk elayoff ///
				rtakjob rnotake emoonlit rmwksab rmwklkg rmhrswk eeno1 estlemp1 tsjdate1 tejdate1 ersend1 ejbhrs1 tpmsum1 ///
				tpyrate1 rpyper1 ejbind1 tjbocc1 ecntrc1 eeno2 estlemp2 tsjdate2 tejdate2 ersend2 ejbhrs2 tpmsum2 ///
				tpyrate2 rpyper2 ejbind2 tjbocc2 *01amta *01amtk thsocsec *03amta *03amtk *04amt thssi euectyp5 ///
				auectyp5 *05amt *27amt thfdstp *20amt thafdc emrtjnt emrtown tmiown rfnkids etenure ecrmth rmedcode ecdmth ///
				ehimth ehiowner ehemply ehicost ehirsn* epndad epnmom ebuscntr

		 else keep suseqnum suid entry panel addid wave month year rot refmth sustate hnp htype hwgt htotinc hearn ///
				phrent fid fnp ftotinc fearn sid snp stotinc pnum intvw p5wgt age brthyr sex race ethncty ms pnsp totinc earn ///
				esr weeks wesr1 wesr2 wesr3 wesr4 wesr5 wksjob wkswop wkslok reasab takjob takjobn cwork uhours wkspt ///
				wksptr empled rhcdis higrade grdcmpl ws1occ ws1ind ws1wks ws1amt ws12023 ws12024 ws12025 ws12028 ///
				ws12029 ws2occ ws2ind ws2wks ws2amt ws22123 ws22124 ws22125 ws22128 ws22129 s01amta is01a ///
				s01amtk is01k hsocsec s03amt is03 hssi r05 ir05 s05amt is05 s20amt is20 hafdc s27amt is27 hfdstp ///
				famrel nkidshl hitm36b carecov medcode caidcov hiind hiown hisrc hipay
		 
		*** Rename variables
		if `syear'>=1996 {      

			foreach x in suid panel wave {
			rename s`x' `x'
			}
			rename eentaid entry
			rename ssuseq suseqnum                  
			rename srotaton rot     
			rename shhadid addid    
			rename srefmon refmth           
			rename rhcalmn mth              
			rename rhcalyr year             
			rename tfipsst statefip         
			rename ehhnumpp hnp             
			rename rhtype htype             
			rename whfnwgt hwgt             
			rename thtotinc hinc            
			rename thearn hearn             
			rename tmthrnt phrent           
			rename rfid famid               
			rename efnp fnp         
			rename tftotinc finc            
			rename tfearn fearn             
			rename rsid subfid              
			rename esfnp snp                
			rename tstotinc sinc
			rename epppnum pnum
			rename eppintvw intvw
			rename wpfinwgt p5wgt
			rename tage age
			foreach x in sex race ms {
				rename e`x' `x'
			}
			rename tbyear brthyr
			rename eorigin ethncty
			rename epnspous pnsp
			rename tptotinc totinc
			rename tpearn earn
			rename rmesr esr
			rename rwksperm wks
			forvalues x=1(1)5 {
				rename rwkesr`x' wesr`x'
			}
			rename rmwkwjb wksjob
			rename eeducate higrade
			rename ejobcntr numjobs
			rename ebuscntr numbus
			rename ersnowrk whynotwork
			rename eawop fullweekout_yn
			rename eabre whyabsent
			rename eptwrk less35_yn
			rename eptresn whyless35
			rename elkwrk lookwork_yn
			rename elayoff layoff_yn
			rename rtakjob takjob_yn
			rename emoonlit moonlit_yn
			rename rmwksab wksabs
			rename rmwklkg wkslok
			rename rmhrswk uhrs_code
			rename ecntrc1 union_yn
			foreach x in 1 2 {
				rename estlemp`x' stlemp`x'_yn
				rename tsjdate`x' stdate`x'
				rename tejdate`x' enddate`x'
				rename ersend`x' whystop`x'
				rename ejbhrs`x' uhours`x'
				rename tpmsum`x' ws`x'amt
				rename tpyrate`x' ws`x'hrwg
				rename rpyper`x' ws`x'freqpay
				rename ejbind`x' ws`x'ind
				rename tjbocc`x' ws`x'occ
			}

			*** Welfare/Insurance program participation and $$
			rename t01amta ss_amt                           // SS-Own
			rename a01amta ss_amt_flag

			rename t01amtk ss_ch_amt                        // SS-Child
			rename a01amtk ss_ch_amt_flag
			rename thsocsec h_ss_amt

			rename t03amta ssi_amt                          // SSI-Own
			rename a03amta ssi_amt_flag

			rename t03amtk ssi_ch_amt                       // SSI-Child
			rename a03amtk ssi_ch_amt_flag

			rename t04amt ssi_st_amt                        // SSI-State-Own
			rename a04amt ssi_st_amt_flag
			rename thssi h_ssi_amt

			rename euectyp5 ui_yn                           // UI
			rename auectyp5 ui_flag
			rename t05amt uiamt
			rename a05amt uiamt_flag

			rename t27amt fs_amt                            // Food Stamps
			rename a27amt fs_amt_flag
			rename thfdstp h_fs_amt

			rename t20amt tanf_amt                  // Public Assistance
			rename a20amt tanf_amt_flag
			rename thafdc h_tanf_amt

			rename emrtjnt mtg_yn
			rename emrtown ownmtg_yn
			rename tmiown intpd_mtg
			rename rfnkids nkidshl
			rename etenure hownstat
			rename ecrmth carecov
			rename rmedcode medcode
			rename ecdmth caidcov
			rename ehimth hiind
			rename ehiowner hiown 
			rename ehemply hisrc 
			rename ehicost hipay
		}       

		else {
			rename month mth
			rename sustate statefip
			rename htotinc hinc
			rename fid famid
			rename ftotinc finc
			rename sid subfid
			rename stotinc sinc

			rename weeks wks
			rename cwork caswork_yn
			rename wkspt wksless35
			rename wksptr whyless35
			rename empled selfemp

			rename ws12023 w1stop
			rename ws22123 w2stop
			rename ws12025 job1hrs
			rename ws22125 job2hrs
			rename ws12028 ws1hrwg
			rename ws22128 ws2hrwg
			rename ws12029 ws1freqpay
			rename ws22129 ws2freqpay

			*** Welfare/Insurance program participation and $$
			rename s01amta ss_amt                              // SS-Own
			rename is01a ss_amt_flag

			rename s01amtk ss_ch_amt                           // SS-Child
			rename is01k ss_ch_amt_flag
			rename hsocsec h_ss_amt

			rename s03amt ssi_amt                               // SSI
			rename is03 ssi_amt_flag
			rename hssi h_ssi_amt

			rename r05 ui_yn                                    // UI
			rename ir05 ui_flag
			rename s05amt uiamt
			rename is05 uiamt_flag

			rename s20amt tanf_amt                              // Public Assistance
			rename is20 tanf_amt_flag
			rename hafdc h_tanf_amt 

			rename s27amt fs_amt                                 // Food Stamps
			rename is27 fs_amt_flag
			rename hfdstp h_fs_amt

			rename takjob takjob_yn
			rename w1stop w1stop_yn
			rename w2stop w2stop_yn
			rename ws12024 whystop1
			rename ws22124 whystop2
			rename hitm36b rsnleft

			for any epndad epnmom whynotwork: gen X=.
		}
		 
		 * Fix merging variables
		 for any addid suid pnum entry: capture qui destring X, replace
		 replace pnum = 1000*entry+pnum
		 
		 *** Recode binary vars
		 replace sex=sex-1
		 label define sexlabel 0 M 1 F
		 label values sex sexlabel
		 
		if `syear'>=1996 {
			for any fullweekout_yn less35_yn lookwork_yn layoff_yn takjob_yn moonlit_yn ///
				stlemp1_yn union_yn stlemp2_yn ui_yn mtg_yn ownmtg_yn: replace X=. if X==-1 \ replace X=-(X-2)

			label define yesno 0 no 1 yes
			for any fullweekout_yn less35_yn lookwork_yn layoff_yn takjob_yn moonlit_yn ///
				stlemp1_yn union_yn stlemp2_yn ui_yn mtg_yn ownmtg_yn: label values X yesno
		}       
		 
		else {
			for any takjob_yn caswork_yn w1stop_yn w2stop_yn ui_yn: replace X=. if X==0 \ replace X=-(X-2)

			label define yesno 0 no 1 yes
			for any takjob_yn caswork_yn w1stop_yn w2stop_yn ui_yn: label values X yesno
		}       
						 
		 tempfile temp`syear'w`i'
		 save `temp`syear'w`i'', replace

		 rm s`syear'w`i'.dat
	}

	*** Append waves together
	use `temp`syear'w1', clear
	forvalues i=2(1)`maxw' {
		 append using `temp`syear'w`i''
	}       

	tempfile temp`syear'
	compress
	save `temp`syear'', replace
}


			******************************************************
***************** 	CREATE unempl SAMPLE AND VARIABLES  ***********************
			******************************************************

foreach syear in 1996 2001 2004 2008 {		
	di ""
	di "THIS LOOP IS FOR YEAR `syear'"
	di ""
		
	use `temp`syear'', clear
	for any panel year: replace X = X+1900 if X<1900

	*** Create sample of interest
	sort suid pnum sex wave year mth
	
	* First see to drop those that by mistake seem to be same individuals
	bysort suid pnum sex: gen diff_byear=brthyr-brthyr[_n+1]
	bysort suid pnum sex: gen diff_age=age-age[_n+1]
	gen mistake=1
	bysort suid pnum sex: replace mistake=0 if (diff_byear>=-1 & diff_byear<=1) | diff_byear==.
	bysort suid pnum sex: egen temp=max(mistake)
	replace mistake=temp
	drop if mistake==1
	drop temp
	
	* Create order variable
	sort suid pnum sex wave year mth
	bysort suid pnum sex: gen order=_n
	
	** Create work/unempl sample
	sort suid pnum sex wave year mth
	replace esr=. if esr==-1 | esr==0
	gen nlf = esr==8	
	gen work = esr==1 | esr==2		
	gen nowork = esr==3 | esr==4 | esr==5 | esr==6 | esr==7 | esr==8
	gen unempl = esr==3 | esr==5 | esr==6 | esr==7
	for any nlf work nowork unempl: replace X=. if esr==.
	replace work=. if work==1 & (ws1amt==0 | ws1amt==.)					// drop self-employed
	
	** Create job separation variable
	sort suid pnum sex order
	
	* Months since job loss variable
	replace enddate1=. if enddate1==-1 
	replace enddate1=. if whystop1!=1 & whystop1!=9 & whystop1!=10 & whystop1!=13    // only for involuntary losers

	generate yr_loss = int(enddate1/10000)
	generate mth_loss = int((enddate1 - yr_loss*10000)/100)
	generate d_loss = enddate1 - yr_loss*10000 - mth_loss*100
	gen ym_loss = ym(yr_loss, mth_loss)
	sort suid pnum sex order

	* Indicator for whether month of job loss is current month
	gen ym_now = ym(year, mth)
	gen loss = ym_loss==ym_now
	bysort suid pnum sex: egen num_losses=total(loss)
	gen temp_order_loss = 9999
	replace temp_order_loss=order if loss==1
	bysort suid pnum sex: egen order_loss=min(temp_order_loss)
	format enddate1 %10.0f  

	replace order_loss=. if order_loss==9999
	gen month_reljl = order-order_loss
	replace month_reljl=. if order_loss==.
	replace month_reljl=-18 if month_reljl<=-18 & month_reljl!=.
	replace month_reljl=30 if month_reljl>=30 & month_reljl!=.
	gen afterloss = month_reljl>0 & month_reljl!=.
	replace afterloss=. if month_reljl==.
	keep if num_loss>0


	*Other eliminations
	*2. Drop repeats
	sort suid pnum wave refmth
	drop if suid==suid[_n-1] & pnum==pnum[_n-1] & panel==panel[_n-1] & wave==wave[_n-1] & refmth==refmth[_n-1]

	*3. Generate variable that will count month in sample and drop any second interviews 
	sort suid pnum sex order
	gen count=1
	replace count=count[_n-1]+1 if suid==suid[_n-1] & pnum==pnum[_n-1] & ((mth==(mth[_n-1]+1) & ///
		year==year[_n-1]) | (year==(year[_n-1]+1) & mth==1)) in 2/l
	gen index=0 
	replace index=1 if count==1 
	replace index=2 if count==1 & suid==suid[_n-1] & pnum==pnum[_n-1] 
	qui by suid pnum: replace index=index[_n-1] if index==0 
	drop if index>1 
 	label var count "month in panel" 

	* OLD EXCLUSIONS from JACULLEN
	*4. getting rid of anyone not there for at least 3 months
	egen temp=sum(index), by (suid pnum sex) 
	gen mthpres3 = temp>=3
	drop if mthpres3==0
	drop index

	*5. Now sorting according to month and locating first 3 consecutive months where individual is 
		//work, defined as esr = 1|2, ws1amt>0, and no selfemp. 
	gen mthwrk3=0
	replace mthwrk3=1 if esr==1 & ws1amt>=0 & ws1amt~=.
	
	sort suid pnum sex order
	replace mthwrk=mthwrk3[_n-1]+1 if suid==suid[_n-1] & pnum==pnum[_n-1] & mthwrk3[_n-1]>0 & ///
		mthwrk3>0 in 2/l
	qui by suid pnum: replace mthwrk3=0 if mthwrk3==1 & mthwrk3[_n+2]~=3
	qui by suid pnum: replace mthwrk3=0 if mthwrk3==2 & mthwrk3[_n+1]~=3
	qui by suid pnum: replace mthwrk3=0 if mthwrk3~=1
	replace mthwrk3=mthwrk3[_n-1] if mthwrk3[_n-1]>0 & suid==suid[_n-1] & pnum==pnum[_n-1] in 2/l
	drop if mthwrk3==0
	
	***GENERATE NEW VARIABLES
	* Months in sample
	gen mark=1
	by suid pnum: egen mthsinsamp = sum(mark)
	drop mark

	* Kids
	rename nkidshl kids
	replace kids=4 if kids>4

	* Education
	gen ed=higrade
	replace ed=1 if ed==31
	replace ed=3 if ed==32
	replace ed=5 if ed==33
	replace ed=7 if ed==34
	replace ed=ed-26 if ed>34 & ed<=38
	replace ed=12 if ed==39 | ed==40
	replace ed=13 if ed>=41 & ed<=43
	replace ed=16 if ed==44
	replace ed=18 if ed>44
	drop higrade

	* Marital dummy and other indicators
	gen mardum=0 
	replace mardum=1 if ms==1 | ms==2 
	gen white=1
	replace white=0 if race>1 
	gen seamth=0
	replace seamth=1 if refmth==4 
	gen fulltime = uhours1[_n-1]>35

	* Generate calendar qtr indicator
	gen calqtr=0
	replace calqtr=1 if mth>=1 & mth<=3
	replace calqtr=2 if mth>=4 & mth<=6
	replace calqtr=3 if mth>=7 & mth<=9
	replace calqtr=4 if mth>=10 & mth<=12
 	label var calqtr "calendar quarter"

	* Generate quarter in sample (calendar quarter #1-8 for each obs so we can collapse)
	replace count=1
	sort suid pnum sex order
	replace count=count[_n-1]+1 if suid==suid[_n-1] & pnum==pnum[_n-1] in 2/l

	gen mark=1
	egen check=sum(mark), by (suid pnum panel year calqtr)
	gen qtr=1 if count==1
	replace qtr=0 if count==1 & check<3
	drop check mark
	
	gen mark=0
	replace mark=1 if qtr~=.
	replace mark=1 if suid==suid[_n-1] & pnum==pnum[_n-1] & panel==panel[_n-1] & calqtr~=calqtr[_n-1] in 2/l

	sort suid pnum panel mark count
	qui by suid pnum panel: replace qtr=qtr[_n-1]+1 if mark==1 & mark[_n-1]==1
	drop mark
	sort suid pnum panel count
	replace qtr=qtr[_n-1] if qtr==.
 	label var qtr "quarter in sample"
	
	***CREATE WEEKLY INCOME PROFILE LOOKING FORWARD FOR FUTURE USE
	sort suid pnum sex order

	***CREATE QUARTERLY WAGE HISTORY AND DEFINE INPUTS FOR UI LAWS
		//-own earnings, total income spouse total income, and family total income are all computed nominal wages
	*Note: use info only from ws1 for own earn because 2nd job is usually job gotten after unemp ends
	preserve
	egen qearn=sum(ws1amt+ws2amt), by (suid pnum qtr)
	egen ws1qearn=sum(ws1amt), by (suid pnum qtr)
	egen fqinc=sum(finc), by (suid pnum qtr)
	egen qwks = sum(wks), by (suid pnum qtr)

	* Generate 3-month sums if agent loses job before calendar quarter of info
	gen mark=0
	replace mark=1 if count<4
	egen temp1=sum(ws1amt+ws2amt), by (suid pnum panel mark)
	egen temp2=sum(finc), by (suid pnum panel mark)
	egen temp4=sum(wks), by (suid pnum panel mark)
	replace qearn=temp1 if qtr==0
	replace fqinc=temp2 if qtr==0
	replace qwks=temp4 if qtr==0
	drop temp1 temp2 temp4 mark count

	collapse (mean) qearn ws1qearn fqinc qwks, by (suid pnum qtr)

	* Generate lagged quarterly wages
	for num 1/5: qui by suid pnum: gen qearn_lX=qearn[_n-X] 
	for num 1/5: qui by suid pnum: gen ws1qearn_lX=ws1qearn[_n-X] 
	for num 1/5: qui by suid pnum: gen qwks_lX=qwks[_n-X] 

	* Compute base period wage, but ignore the lag when data limits it
	gen bpw=4*qearn_l1 if qtr<=2
	replace bpw=2*(qearn_l1+qearn_l2) if qtr==3
	replace bpw=(4/3)*(qearn_l1+qearn_l2+qearn_l3) if qtr==4
	replace bpw=(qearn_l1+qearn_l2+qearn_l3+qearn_l4) if qtr==5
	replace bpw=(qearn_l2+qearn_l3+qearn_l4+qearn_l5) if qtr>=6

	gen annwg=bpw if qtr<=4
	replace annwg=(qearn_l1+qearn_l2+qearn_l3+qearn_l4) if qtr>=5

	gen hq1w=qearn_l1 if qtr<=2
	replace hq1w=max(qearn_l1,qearn_l2) if qtr==3
	replace hq1w=max(qearn_l1,qearn_l2,qearn_l3) if qtr==4
	replace hq1w=max(qearn_l1,qearn_l2,qearn_l3,qearn_l4) if qtr>=5

	gen hq2w=qearn_l1 if qtr<=2
	replace hq2w=min(qearn_l1,qearn_l2) if qtr==3
	replace hq2w=max((qearn_l1<hq1w)*qearn_l1, (qearn_l2<hq1w)*qearn_l2, (qearn_l3<hq1w)*qearn_l3) if qtr==4
	replace hq2w=max((qearn_l1<hq1w)*qearn_l1, (qearn_l2<hq1w)*qearn_l2, (qearn_l3<hq1w)*qearn_l3, ///
		(qearn_l4<hq1w)*qearn_l4) if qtr>=5

	gen cv_earn=sqrt(((qearn_l1-bpw/4)^2+(qearn_l2-bpw/4)^2)/2)/(bpw/4) if qtr==3
	replace cv_earn=sqrt(((qearn_l1-bpw/4)^2+(qearn_l2-bpw/4)^2+(qearn_l3-bpw/4)^2)/3)/(bpw/4) if qtr==4
	replace cv_earn=sqrt(((qearn_l1-bpw/4)^2+(qearn_l2-bpw/4)^2+(qearn_l3-bpw/4)^2+(qearn_l4-bpw/4)^2)/4) ///
		/(bpw/4) if qtr>=5

	* Generate annual family info
	for num 1/4: qui by suid pnum: gen fqinc_lX=fqinc[_n-X] 
	gen fanninc=4*fqinc_l1 if qtr<=2
	replace fanninc=2*(fqinc_l1+fqinc_l2) if qtr==3
	replace fanninc=(4/3)*(fqinc_l1+fqinc_l2+fqinc_l3) if qtr==4
	replace fanninc=(fqinc_l1+fqinc_l2+fqinc_l3+fqinc_l4) if qtr>=5

	keep suid pnum qtr fanninc qearn_l* ws1qearn_l* qwks_l* bpw hq1w hq2w annwg cv_earn
	tempfile unemp`syear'
	save `unemp`syear'', replace
	restore
	
	* Merge back
	sort suid pnum qtr
	merge suid pnum qtr using `unemp`syear''
	tabulate _merge
	drop _merge
	
	capture qui destring suid, replace
	capture qui destring addid, replace

	*** COLLAPSE dataset into spells -- note that all cross-sectional data, e.g. wave, refers to date of job sep
	keep if loss==1

	rename uhours1 uhours
	
	keep suid pnum panel wave mth year refmth qtr statefip p5wgt age sex ethncty race mardum ed htype ///
		ws1ind ws1occ kids qearn_l* cv_earn bpw hq1w hq2w annwg fanninc month uhour* qwks_l* loss order_loss month_reljl 

	order suid pnum panel wave mth year refmth qtr statefip p5wgt age sex ethncty mardum ed htype ///
		ws1ind ws1occ kids qearn_l* cv_earn bpw hq1w hq2w annwg fanninc
	
	compress
	tempfile ui`syear'
	save `ui`syear'', replace
}

use `ui1996', clear
foreach syear in 2001 2004 2008 {
	append using `ui`syear''
}
replace year = year+1900 if year<1900

keep if statefip<60
keep if age>=18 & age<=60
drop if mardum==.
drop if kids==.
keep if year>=1993 & year<=2013

drop if annwg==0		// drop these outliers
gen obsid=_n

*** Fix demographics
* One race variable
gen hisp = ethncty>=14 & ethncty<=19
gen white = race==1 & hisp==0
gen black = race==2 & hisp==0
gen other = race>2 & hisp==0
drop if race==.
drop race

gen race=1 if white==1
replace race=2 if black==1 
replace race=3 if hisp==1 
replace race=4 if other==1

* Sex
gen female = sex==1
drop if sex==.
drop sex

* Age
gen agegr=.
replace agegr=1 if age>=18 & age<25
for any 2 3 4 5 6 7 8: replace agegr=X if age>=20+X*5-5 & age<20+X*5
replace agegr=8 if age>=55

* Education
replace ed=. if ed==-1
gen edgr=1 if ed<=11
replace edgr=2 if ed==12 
replace edgr=3 if ed>=13 & ed<=15
replace edgr=4 if ed>=16
drop if ed==.
tab edgr, g(educ)

* Marriage status
gen married=mardum

merge m:1 year using "${rawdata}/StateYear/cpi_6717.dta", gen(cpim)
drop if year>2015

save "${regdata}/uireplrates.dta", replace


			*************************************************
***************** 	CREATE INSTRUMENT_SIPP_XX DATASETS  ***********************
			*************************************************
// Notes: this program was last executed in June 2018 for the main replacement rates, 
// and in October 2018 for the replacement rates by gender and age. 
// Michigan replacement rate use TAXSIM, so any changes to TAXSIM will affect Michigan UI measures.
use "${regdata}/uireplrates.dta", clear
			
			
*** Merge CPI for 1 year lags
gen annual_cpi=cpi/237.017		// cpi in 2015 values, no lags anymore

*** Prepare CPI. Create one cpi variable per year, and one true cpi variable
gen cpi_trueyear=annual_cpi

forvalues n = 1990/2014 {
	quietly sum annual_cpi if year==`n'
	scalar annual_cpi_scalar = r(mean)
	gen cpi_year_`n' =  annual_cpi_scalar
}
gen cpi_year_2015 = 1

keep if cpim==3

*** Keep only proper ages
keep if age>=18 & age<=60

*** Keep only bottom of distribution
gen real_wage = annwg*237.017/cpi

*** Generate marital status variable  (1 for single, 2 for joint, 3 for head of household.
	//Taxsim27: head of household is automatically determined by taxsim9)
gen mstat=.
replace mstat=1 if mardum==0 & kids==0
replace mstat=2 if mardum==1
replace mstat=1 if mardum==0 & kids>0

* Generate dependents variable
gen depx=kids
gen state=0

for any annwg bpw hq1w hq2w qearn_l1 qearn_l2: replace X=0 if X==.

************************************************
**** LOOP FOR EACH YEAR
************************************************
* Create names for loop
for any nndate tao pwages: gen X=0
gen orig=1

forvalues y = 1990/2015 { 	
	preserve
	display ""
	display "THIS IS LOOP FOR YEAR `y'"

	replace year = `y'-1  // taxes are calculated according to the previous year tax rules
	
	* For UI Calculator 
	for any annwg bpw hq1w hq2w qearn_l1 qearn_l2: replace X=X*(cpi_year_`y'/cpi_trueyear)
	
	* For TAXSIM Calculator
	replace pwages = annwg
	//quietly taxsim9, replace
	arc9, version(180628) replace	// New version of taxsim9 changes the results
	replace tao = fiitax/annwg
	replace tao=0 if tao==.
	
	replace year = `y'		// back to normal year 
	
	tempfile year_`y'
	save `year_`y'', replace
 
 
	**** LOOP FOR EACH STATE
	forvalues n = 1/51 {
		display ""
		display "THIS IS LOOP FOR STATE `n'"
		use `year_`y'', clear
		gen st =`n'
		drop statefip
		
		replace nndate=year*10000+1*100
		
		*** Use the calculator based on wages in the year before unempl
		merge m:1 year st kids using "${rawdata}/StateYear/uilaws_updated_sim.dta", gen(uilawsm)	
		keep if uilawsm==3
		drop uilawsm
		
		merge m:1 st year using "${rawdata}/StateYear/state_weeklywages.dta", gen(stwages)
		keep if orig==1	
		drop stwages
		
		// use the lawassign do file
		gen children=kids
		quietly do "${wkdir}/lawassign_sipp.do"
		display ""

		// use the eligibility do file
		quietly do "${wkdir}/elig_sipp.do"
		
		*** Replacement rates
		replace wba=0 if elig==0
		gen sim_repl_sipp=wba/(hq1w/13)

		tempfile temp_data
		save `temp_data', replace
		
		*** Collapse data at state-year-kids level, no control
		collapse (mean) sim_repl_sipp wba (rawsum) p5wgt [pw=p5wgt], by(year st kids)

		tempfile sims_`n'_`y'_y   		
		save `sims_`n'_`y'_y', replace 
		
		*** Collapse data at state-year-kids level, adding gender
		use `temp_data', clear
		collapse (mean) sim_repl_sipp wba (rawsum) p5wgt [pw=p5wgt], by(year st kids female)

		tempfile sims_`n'_`y'_gen   		
		save `sims_`n'_`y'_gen', replace 	
		
		*** Collapse data at state-year-kids level, adding age
		use `temp_data', clear
		collapse (mean) sim_repl_sipp wba (rawsum) p5wgt [pw=p5wgt], by(year st kids agegr)

		tempfile sims_`n'_`y'_age  		
		save `sims_`n'_`y'_age', replace 	
		
		*** Collapse data at state-year-kids level, adding gender and age
		use `temp_data', clear
		collapse (mean) sim_repl_sipp wba (rawsum) p5wgt [pw=p5wgt], by(year st kids female agegr)

		tempfile sims_`n'_`y'_agend   		
		save `sims_`n'_`y'_agend', replace 	
	}
	
	
	**** Append all states together
	foreach c in y gen age agend {
		use `sims_1_`y'_`c'', clear
		forvalues n = 2/51 {
			append using `sims_`n'_`y'_`c''
		}
		
		tempfile sims_`y'_`c' 
		save `sims_`y'_`c'', replace
	}

	restore
}


************************************************
*** Append the state-year-kids data together
************************************************

foreach c in y gen age agend {
	* Now all years
	use `sims_1990_`c'', clear
	forvalues y = 1991/2015 {
		append using `sims_`y'_`c''
	}
				
	label var sim_repl_sipp "Sim. R-rate"
	label var wba "WBA"

	* Fix data
	sort st year
	merge m:1 st using "${rawdata}/StateYear/statecodes_all.dta" 
	drop _merge

	save "${regdata}/instrument_sipp_`c'.dta", replace
}


				*******************************************
*******************		MERGE STATE LEVEL CONTROLS		**********************
				*******************************************

**********************
*** PREPARE DATA
**********************
*** Prepare Simulate Replacement Rates
use "${regdata}/instrument_sipp_y.dta", clear	
merge 1:1 year statefip kids using "${rawdata}/StateYear/uilaws_updated_sim.dta"
keep if _merge==3
drop _merge

* Add population in CPS
merge 1:1 year statefip kids using  "${rawdata}/StateYear/ipums_urate.dta", keepus(weight_pop)
keep if _merge==3												// 1993 onwards
drop _merge
replace weight_pop=weight_pop/12		// monthly data, so each US person is counted 12 times
bysort statefip year: egen st_max=max(max)
bysort statefip year: egen st_min=min(min)

* Collapse at state-year (it was by state-year-kids before)
collapse (mean) sim_repl_sipp wba st_max max  st_min min st [aw=p5wgt], by(statefip year)
for any sim_repl_sipp wba: rename X st_X
for any min max: rename X st_ave_X
keep if year>=1990 & year<=2015
tempfile replrates
save `replrates', replace

*** Prepare UI benefits data			
use "${rawdata}/StateYear/uibens_7113.dta", clear

* Generate extended and emergency benefits
gen st_extended=fedstebbenefitspaid>0 & fedstebbenefitspaid!=.
gen fed_emerg=emerbenefitspaid>0 & emerbenefitspaid!=.
keep if year>=1990 & year<=2015 
rename month intmo

tempfile uibens	
save `uibens', replace

*** Prepare children's Medicaid/CHIP
use  "${rawdata}/StateYear/st_child_med_8814.dta", replace
rename  childthresh pregnthresh
append using "${rawdata}/StateYear/st_preg_med_8714.dta"
replace age=-1 if age==.
rename pregnthresh medthresh

collapse (mean) medthresh, by(stfips year)
keep if year>=1990 & year<=2015 
tempfile medicaid
save `medicaid', replace

**********************
*** CLEAN UK CPT DATA
**********************
*** Input unemployment rates
* Input Urate/Pop
use "${rawdata}/StateYear/ukcpr_welfare_8015.dta", clear
drop if statefip>56 | statefip==43

* Input CPI to create real values
merge m:1 year using "${rawdata}/StateYear/cpi_6717.dta", gen(cpim)
drop if cpim!=3
drop cpim

* Unemployment rate
rename unemploymentrate urate
replace urate=urate/100			// Divide by 100 to make the coefficients larger

* Epop (note this is all population)
gen epop = employment / population

* Minimum wage
egen minwage=rowmax(federalmin statemin)

* AFDC Max
rename afdctanfbenefitfor4personfamily afdcmax4

* State GDP
rename grossstatepro gsp

* Transform in real values
replace gsp=gsp*237.017/cpi													// Millions, 2015$ real 
for any afdcmax4 minwage: replace X=X*2.37017/cpi				// 100s, 2015$ real 
keep urate epop gsp year statefip population minwage afdcmax4 cpi

* Create lags and growth rates
sort statefip year
for any 1 2: bysort statefip: gen urate_lagX=urate[_n-X] if year==year[_n-X]+X	
keep if year>=1990 & year<=2015 

**********************
*** MERGE GENEROSITY DATA
**********************
*** Merge Simulate Replacement Rates 
merge 1:1 statefip year	 using `replrates'
drop _merge
rename statefip stfips
for any st_max st_min st_ave_max st_ave_min: replace X=X*2.37017/cpi			// 100s, 2015$ real 

*** Medicaid/SCHIP = Average for pregnant women and children 0-16
merge 1:1 stfips year using `medicaid'
drop _merge
keep if year>=1990 & year<=2015 

*** Welfare reform
merge 1:1 stfips year using  "${rawdata}/StateYear/st_welfreform_8813.dta"
drop _merge
keep if year>=1990 & year<=2015 
replace reform=0 if year>=2013

*** EITC
rename stfips statefip
merge m:1 statefip year using  "${rawdata}/StateYear/eitc_vals_8415.dta"
drop _merge
keep if year>=1990 & year<=2015

**********************
*** MERGE SPENDING DATA
**********************
*** Add welfare spending here (data in thousands)
merge 1:1 statefip year using "${rawdata}/StateYear/welf_spend_6815.dta", keepus(snap eitc tanf medicaid retdi) gen(m_welf) 
drop if m_welf==2
drop m_welf
keep if year>=1990 & year<=2015


* CPI for all spending
foreach x in snap eitc tanf medicaid retdi {
	replace `x'_spend = `x'_spend*237.017/cpi 
	gen `x'_pop=`x'_spend/population
}

*** Add UI data (in Thousands)
merge 1:1 statefip year using "${rawdata}/StateYear/eta_uifunds_3816.dta"
drop ui_wage ui_aww ui_rr ui_high_co state
sort statefip year
keep if year>=1990 & year<=2015
rename ui_benefits_paid ui_benefits

* Fix UI
for any ui_benefits ui_net_reserves: replace X=X*237.017/cpi
for any ui_benefits ui_net_reserves: gen X_pop=X/population

* Lags
for any 1 2: bysort statefip: gen ui_netres_lagX_pop=ui_net_reserves_pop[_n-X]
drop if _merge==2
keep if year>=1990 & year<=2015
drop _merge

*** Merge wages
merge 1:1 statefip year using "${rawdata}/StateYear/state_weeklywages.dta", gen(stwages)
replace avweekwage=avweekwage*2.37017/cpi	

**********************
*** LABEL ALL NEW VARIABLES
**********************
label var cpi "CPI"

for any 1 2: label var urate_lagX "X Lag Unemployment Rate"
label var urate "Unemployment Rate"
label var avweekwage "Average Weekly Wage (\\$2015, 100s)"
label var population "Population"
label var epop "Employment Rate"
label var gsp "Gross State Product (\\$2015, Millions)"

label var st_max "St Max UI Benefit (\\$2015, 100s)"
label var st_min "St Min UI Benefit (\\$2015, 100s)"
label var st_ave_max "St Ave Max UI Benefit (\\$2015, 100s)"
label var st_ave_min "St Ave Min UI Benefit (\\$2015, 100s)"
label var st_sim_repl "State Simulated Replacement Rate"
label var st_wba "St Simulated WBA (\\$2015, 100s)"

label var ui_benefits "State UI Benefits"
label var ui_net_reserves "State UI Net Reserves"
label var ui_benefits_pop "State UI Benefits/Population"
label var ui_net_reserves_pop "State UI Net Reserves/Population"
label var ui_netres_lag1_pop "1 Lag (UI Net Reserves/Population)"
label var ui_netres_lag2_pop "2 Lag (UI Net Reserves/Population)"

label var medthresh "Mean Medicaid Pov Threshold"
label var reform "Welfare Reform Indicator"
label var minwage "Minimum Wage (\\$2015, 100s)"
label var eitc_val "State EITC (Percent Federal)"
label var afdcmax4 "AFDC Max Benefits (\\$2015, 100s)"

label var snap_pop "FS Spending/Population"
label var tanf_pop "AFDC Spending/Population"
label var medicaid_pop "Medicaid Spending/Population"
label var retdi_pop "SS Spending/Population"
label var eitc_pop "EITC Spending/Population"

drop if year<1990
save "${regdata}/state_data.dta", replace


			******************************************************
***************** 		CREATE UNEMPLOYMENT VARIABLES		***********************
			******************************************************
 
foreach syear in 1993 1996 2001 2004 2008 {     
	di ""
	di "THIS LOOP IS FOR YEAR `syear'"
	di ""
				 
	use `temp`syear'', clear
	capture qui destring suid, replace
	capture qui destring addid, replace

	* Fix some variables
	for any panel year: replace X = X+1900 if X<1900
	if `syear'==1993 {
		for any lookwork_yn layoff_yn numjobs numbus stdate1 enddate1 stlemp1_yn eeno1: gen X=.
	} 
		 
	*** Create samples of interest
	sort suid pnum sex wave year mth

	* First see to drop those that by mistake seem to be same individuals
	bysort suid pnum sex: gen diff_byear=brthyr-brthyr[_n+1]
	bysort suid pnum sex: gen diff_age=age-age[_n+1]
	gen mistake=1
	bysort suid pnum sex: replace mistake=0 if (diff_byear>=-1 & diff_byear<=1) | diff_byear==.
	bysort suid pnum sex: egen temp=max(mistake)
	replace mistake=temp
	drop if mistake==1
	drop temp

	* Create order variable
	sort suid pnum sex wave year mth
	bysort suid pnum sex: gen order=_n

	* Create LFP variables. Categories are: 1 job all month; 2 job all month, absent 1+ weeks (no layoff), 
	// 3 job all month, absent 1+ weeks (layoff); 4 job 1+ weeks (no layoff); 5 job 1+ weeks (layoff); 6 no job (layoff); 
	// 7 no job (some layoff), 8 no job (no layoff)
	sort suid pnum sex wave year mth
	replace esr=. if esr==-1 | esr==0
	gen nlf = esr==8        
	gen work = esr==1 | esr==2              
	gen nowork = esr==3 | esr==4 | esr==5 | esr==6 | esr==7 | esr==8
	gen unempl = esr==3 | esr==5 | esr==6 | esr==7
	for any nlf work nowork unempl: replace X=. if esr==.

	* Self-employed     
	gen self_employed = numbus>=1 & numbus!=.
	replace self_employed=. if esr==.
	replace self_employed = 1 if work==1 & (ws1amt==0 | ws1amt==.)       // drop self-employed
	if `syear'<1996 replace self_employed = 1 if work==1 & selfemp==2    

	by suid pnum sex: egen perc_semployed=mean(self_employed)

	** Create job separation variable
	sort suid pnum sex order
	gen jobsep=0
	by suid pnum sex: replace jobsep = work[_n-1]==1 & nowork==1
	by suid pnum sex: egen jobsep_num=sum(jobsep)

	** Create job loss variable: separation and at least one week on layoff/looking for work
	gen jobloss=0
	by suid pnum sex: replace jobloss = work[_n-1]==1 & unempl==1 & self_employed[_n-1]!=1          

	by suid pnum sex: egen jobloss_num=sum(jobloss)
	gen jobloser=jobloss_num>0

	* Create spell number
	gen spell=.
	sort suid pnum sex jobloss order
	by suid pnum sex jobloss: replace spell = _n if jobloss==1
	sort suid pnum sex order
	by suid pnum sex: replace spell=spell[_n-1] if jobloss!=1 & unempl==1
	by suid pnum sex: replace spell=spell[_n+1] if jobloss[_n+1]==1 & work==1
	forvalues i=1/60 {
		qui replace spell = spell[_n+1] if suid==suid[_n+1] & pnum==pnum[_n+1] & ///
			order==order[_n+1]-1 & work==1 & work[_n+1]==1 in 1/l

	}       
	replace spell=0 if spell==. & jobloser==0   // 0 means never spell, missing means spell has ended

	* Create variables of lagged work status
	sort suid pnum sex order
	for any 1 2 3 4 5 6 7 8 9 10 11: bysort suid pnum sex: gen esr_lX = esr[_n-X]

	*** Other eliminations
	* Drop repeats
	sort suid pnum wave refmth
	drop if suid==suid[_n-1] & pnum==pnum[_n-1] & panel==panel[_n-1] & wave==wave[_n-1] & refmth==refmth[_n-1]

	*** OLD EXCLUSIONS from JACULLEN
	* Getting rid of anyone not there for at least 3 months
	gen index=1
	egen temp=sum(index), by (suid pnum sex) 
	gen mthpres3 = temp>=3
	drop index temp

	* Now sorting according to month and locating first 3 consecutive months where individual is
		//work, defined as esr = 1, ws1amt>0, and no selfemp. 
	gen mthwrk3=0
	if `syear'>=1996 replace mthwrk3=1 if esr==1 & ws1amt>=0 & ws1amt~=.
	else replace mthwrk3=1 if esr==1 & ws1amt>=0 & ws1amt~=. & selfemp~=2

	sort suid pnum sex order
	replace mthwrk=mthwrk3[_n-1]+1 if suid==suid[_n-1] & pnum==pnum[_n-1] & mthwrk3[_n-1]>0 & ///
		 mthwrk3>0 in 2/l
	qui by suid pnum: replace mthwrk3=0 if mthwrk3==1 & mthwrk3[_n+2]~=3
	qui by suid pnum: replace mthwrk3=0 if mthwrk3==2 & mthwrk3[_n+1]~=3
	qui by suid pnum: replace mthwrk3=0 if mthwrk3~=1
	replace mthwrk3=mthwrk3[_n-1] if mthwrk3[_n-1]>0 & suid==suid[_n-1] & pnum==pnum[_n-1] in 2/l

	*** JOB SEPARATION VARIABLES
	* Define UI receipt (during any point in spell) variable
	sort suid pnum sex order
	gen ui_yn2 = (uiamt>0 & uiamt!=.) | (suid==suid[_n+1] & pnum==pnum[_n+1] & unempl==1 & ///
		 unempl[_n+1]==1 & spell==spell[_n+1] & uiamt[_n+1]>0 & uiamt[_n+1]!=.)
	sort suid pnum sex spell order
	by suid pnum sex spell: egen uireceipt=sum(ui_yn)
	replace uireceipt=uireceipt>0

	* Identify temporary layoffs (by esr==3 at ANY point in spell)
	by suid pnum sex spell: egen templayoff=sum(esr==3)
	replace templayoff=templayoff>0

	* Identify people that were laid off
	gen layoff=0
	replace layoff=1 if whystop1==1
	by suid pnum sex spell: egen temp=max(layoff)
	replace layoff=temp
	drop temp

	* Identify people that retired/school/housewife
	gen retother=0
	replace retother=1 if (whystop1==2 | whystop1==6) & `syear'<1996
	replace retother=1 if (whystop1==2 | whystop1==3 | whystop1==4 | whystop1==7 | whystop1==14) & ///
		 `syear'>=1996
	by suid pnum sex spell: egen temp=max(retother)
	replace retother=temp
	drop temp

	*** Merge health data
	sort suid addid pnum panel wave mth     
	merge m:1 suid addid pnum panel wave using "${rawdata}/SIPP/Data/Raw/SIPP/Health/ready_health.dta", gen(mhealth)
	sort suid addid pnum panel wave mth     
	drop if mhealth==2

	*** GENERATE NEW VARIABLES
	* Months in sample
	sort suid pnum sex wave year mth
	gen mark=1
	by suid pnum: egen mthsinsamp = sum(mark)
	drop mark

	* Kids
	rename nkidshl kids
	replace kids=4 if kids>4 & kids!=.

	* Education
	if `syear'>=1996 {
		gen ed=higrade
		replace ed=1 if ed==31
		replace ed=3 if ed==32
		replace ed=5 if ed==33
		replace ed=7 if ed==34
		replace ed=ed-26 if ed>34 & ed<=38
		replace ed=12 if ed==39 | ed==40
		replace ed=13 if ed>=41 & ed<=43
		replace ed=16 if ed==44
		replace ed=18 if ed>44
		drop higrade
	}

	else {
		replace higrade=higrade-1 if grdcmpl==2 & higrade>0 
		gen ed=higrade
		replace ed=12 if higrade==20
		replace ed=ed-8 if higrade>20 
		drop grdcmpl 
	}       

	* Marital dummy and other indicators
	gen mardum=0 
	replace mardum=1 if ms==1 | ms==2
	replace mardum=. if ms==.

	gen hisp = ethncty>=14 & ethncty<=19
	gen white = race==1 & hisp==0
	gen black = race==2 & hisp==0
	gen other = race>2 & hisp==0

	if `syear'>=1996 gen fulltime = uhours1[_n-1]>35
	else gen fulltime = uhours[_n-1]>35

	***************************************
	*** VARIABLES FOR UI CALCULATOR. Borrowed from Chetty
	***************************************
	* Generate calendar qtr indicator
	gen calqtr=0
	replace calqtr=1 if mth>=1 & mth<=3
	replace calqtr=2 if mth>=4 & mth<=6
	replace calqtr=3 if mth>=7 & mth<=9
	replace calqtr=4 if mth>=10 & mth<=12
	label var calqtr "calendar quarter"

	* Generate quarter in sample (calendar quarter #1-8 for each obs so we can collapse)
	gen count=1
	sort suid pnum panel wave refmth
	replace count=count[_n-1]+1 if suid==suid[_n-1] & pnum==pnum[_n-1] in 2/l

	gen mark=1
	egen check=sum(mark), by (suid pnum panel year calqtr)
	gen qtr=1 if count==1
	replace qtr=0 if count==1 & check<3
	drop check mark

	gen mark=0
	replace mark=1 if qtr~=.
	replace mark=1 if suid==suid[_n-1] & pnum==pnum[_n-1] & panel==panel[_n-1] & calqtr~=calqtr[_n-1] in 2/l

	sort suid pnum panel mark count
	qui by suid pnum panel: replace qtr=qtr[_n-1]+1 if mark==1 & mark[_n-1]==1
	drop mark
	sort suid pnum panel count
	replace qtr=qtr[_n-1] if qtr==.
	label var qtr "quarter in sample"

	***CREATE QUARTERLY WAGE HISTORY AND DEFINE INPUTS FOR UI LAWS ///
		  //-own earnings, total income spouse total income, and family total income are all computed nominal wages
	*Note: use info only from ws1 for own earn because 2nd job is usually job gotten after unemp ends
	preserve
	egen qearn=sum(ws1amt+ws2amt), by (suid pnum qtr)
	egen ws1qearn=sum(ws1amt), by (suid pnum qtr)
	egen fqinc=sum(finc), by (suid pnum qtr)
	egen qwks = sum(wks), by (suid pnum qtr)

	* Generate 3-month sums if agent loses job before calendar quarter of info
	gen mark=0
	replace mark=1 if count<4
	egen temp1=sum(ws1amt+ws2amt), by (suid pnum panel mark)
	egen temp2=sum(finc), by (suid pnum panel mark)
	egen temp4=sum(wks), by (suid pnum panel mark)
	replace qearn=temp1 if qtr==0
	replace fqinc=temp2 if qtr==0
	replace qwks=temp4 if qtr==0
	drop temp1 temp2 temp4 mark count

	collapse (mean) qearn ws1qearn fqinc qwks, by (suid pnum qtr)

	* Generate lagged quarterly wages
	for num 1/5: qui by suid pnum: gen qearn_lX=qearn[_n-X] 
	for num 1/5: qui by suid pnum: gen ws1qearn_lX=ws1qearn[_n-X] 
	for num 1/5: qui by suid pnum: gen qwks_lX=qwks[_n-X] 

	* Compute base period wage, but ignore the lag when data limits it
	gen bpw=4*qearn_l1 if qtr<=2
	replace bpw=2*(qearn_l1+qearn_l2) if qtr==3
	replace bpw=(4/3)*(qearn_l1+qearn_l2+qearn_l3) if qtr==4
	replace bpw=(qearn_l1+qearn_l2+qearn_l3+qearn_l4) if qtr==5
	replace bpw=(qearn_l2+qearn_l3+qearn_l4+qearn_l5) if qtr>=6
	gen annwg=bpw if qtr<=4
	replace annwg=(qearn_l1+qearn_l2+qearn_l3+qearn_l4) if qtr>=5

	gen hq1w=qearn_l1 if qtr<=2
	replace hq1w=max(qearn_l1,qearn_l2) if qtr==3
	replace hq1w=max(qearn_l1,qearn_l2,qearn_l3) if qtr==4
	replace hq1w=max(qearn_l1,qearn_l2,qearn_l3,qearn_l4) if qtr>=5

	gen hq2w=qearn_l1 if qtr<=2
	replace hq2w=min(qearn_l1,qearn_l2) if qtr==3
	replace hq2w=max((qearn_l1<hq1w)*qearn_l1, (qearn_l2<hq1w)*qearn_l2, (qearn_l3<hq1w)*qearn_l3) if qtr==4
	replace hq2w=max((qearn_l1<hq1w)*qearn_l1, (qearn_l2<hq1w)*qearn_l2, (qearn_l3<hq1w)*qearn_l3, ///
		(qearn_l4<hq1w)*qearn_l4) if qtr>=5

	gen cv_earn=sqrt(((qearn_l1-bpw/4)^2+(qearn_l2-bpw/4)^2)/2)/(bpw/4) if qtr==3
	replace cv_earn=sqrt(((qearn_l1-bpw/4)^2+(qearn_l2-bpw/4)^2+(qearn_l3-bpw/4)^2)/3)/(bpw/4) if qtr==4
	replace cv_earn=sqrt(((qearn_l1-bpw/4)^2+(qearn_l2-bpw/4)^2+(qearn_l3-bpw/4)^2+(qearn_l4-bpw/4)^2)/4) ///
		/(bpw/4) if qtr>=5

	* Generate annual family info
	for num 1/4: qui by suid pnum: gen fqinc_lX=fqinc[_n-X] 
	gen fanninc=4*fqinc_l1 if qtr<=2
	replace fanninc=2*(fqinc_l1+fqinc_l2) if qtr==3
	replace fanninc=(4/3)*(fqinc_l1+fqinc_l2+fqinc_l3) if qtr==4
	replace fanninc=(fqinc_l1+fqinc_l2+fqinc_l3+fqinc_l4) if qtr>=5

	keep suid pnum qtr fanninc qearn_l* ws1qearn_l* qwks_l* bpw hq1w hq2w annwg cv_earn
	tempfile unemp`syear'
	save `unemp`syear'', replace
	restore

	* Merge back
	sort suid pnum qtr
	merge suid pnum qtr using `unemp`syear''
	drop _merge

	sort suid pnum year mth

	if `syear'>=1996 gen othuiamt=.
	if `syear'>=1996 rename uhours1 uhours

	keep suid addid famid pnum entry panel wave mth year refmth qtr statefip p5wgt age sex white hisp black other ethncty  ///
		mardum ed hnp fnp kids hinc hearn finc fearn totinc earn earn qearn_l* bpw hq1w hq2w annwg cv_earn fanninc  ///
		spell mthwrk3 mthpres3 templayoff uhour* qwks_l* unempl nowork work nlf esr esr_l* layoff retother order  ///
		joblos* jobsep* whystop1 ui_yn* uireceipt uiamt* whynot* *_amt* ws1qearn_l* mhealth ehltstat ehospsta ehospnit ///
		epndad epnmom carecov medcode caidcov hiind hiown hisrc hipay stdate1 enddate1 stlemp1_yn eeno1 numbus ///
		edaysick evisdoc evisdent htype

	compress
	tempfile spell_`syear'
	save `spell_`syear'', replace
}


			******************************************************
***************** 		CREATE YEAR-STATE-KIDS DATA		***********************
			******************************************************
			
use `spell_1993', clear
foreach syear in 1996 2001 2004 2008 {
	append using `spell_`syear''
}
keep if statefip<60 & statefip!=.

***********************
*** Make sure data is good
***********************
rename uiamt ui_amt
rename uiamt_flag ui_amt_flag

*** First figure out if better to use household or individual program amounts
* SS and SSI have both adult and child variables. Combine them
for any ss_amt ss_ch_amt ssi_amt ssi_ch_amt ssi_st_amt tanf_amt fs_amt ui_amt: gen X_imp=X
for any ssi_ch_amt ssi_st_amt: replace X=0 if X==.
replace ss_amt_imp=ss_amt+ss_ch_amt
replace ssi_amt_imp=ssi_amt+ssi_ch_amt+ssi_st_amt

* Make sure the household variable I construct is the same as the SIPP one
foreach x in ss tanf {
	bysort panel suid year mth: egen h2_`x'_amt=total(`x'_amt_imp)
	gen diff_`x'=h2_`x'_amt-h_`x'_amt	
	gen diff_`x'_ind=h2_`x'_amt!=h_`x'_amt
	replace diff_`x'_ind=. if h2_`x'_amt==. | h_`x'_amt==.
}
// Most times summing across indivs gives same answer as total household X.
sort panel suid year mth pnum age

// EK decides to drop all flagged amounts, and then re-add everything.
* Drop all imputed data
foreach x in ss ss_ch ssi ssi_ch ssi_st fs tanf ui {
	replace `x'_amt=0 if `x'_amt_flag!=0
}
replace ss_amt=ss_amt+ss_ch_amt
replace ssi_amt=ssi_amt+ssi_ch_amt+ssi_st_amt
drop *flag ss_ch_amt ssi_ch_am* ssi_st_am*

***********************
*** Collapse
***********************
collapse (sum) ss_am* ssi_am* fs_am* tanf_am* ui_am* (rawsum) p5wgt [pw=p5wgt], by(year state kids)
save "${rawdata}/SIPP/sipp_ysk_programs.dta", replace


				***********************************************
*******************		MERGE STATE-KIDS LEVEL CONTROLS		**********************
				***********************************************

use "${rawdata}/StateYear/ukcpr_welfare_8015.dta", clear
drop if statefip>56 | statefip==43

* Input CPI to create real values
merge m:1 year using "${rawdata}/StateYear/cpi_6717.dta", gen(cpim)
drop if cpim!=3
drop cpim

keep pop year statefip cpi
keep if year>=1990 & year<=2015 
tempfile ukpcr
save `ukpcr', replace

*** Prepare Simulate Replacement Rates
use "${regdata}/instrument_sipp_y.dta", clear	
merge 1:1 year statefip kids using "${rawdata}/StateYear/uilaws_updated_sim.dta"
drop if _merge!=3
drop _merge

*** Add IPUMS (Monthly) CPS data
merge 1:1 year statefip kids using "${rawdata}/StateYear/ipums_urate.dta"
keep if _merge==3
drop _merge
for any weight_ur weight_pop: replace X=X/12		// monthly data, so each US person is counted 12 times

*** Add SIPP program data
merge 1:1 year statefip kids using "${rawdata}/SIPP/sipp_ysk_programs.dta"
drop if _merge==2
drop _merge

*** Add March CPS data
merge 1:1 year statefip kids using "${rawdata}/StateYear/cpsdata.dta"
drop if _merge==2
drop _merge
rename afdc_spend tanf_spend

* Fix weights first
rename weight_mar_prog weight_cps
rename p5wgt weight_sipp_imp		// one unique weight for both imputed and not imputed
sum weight_*

*** UKCPR data
sort year
merge m:1 year statefip using `ukpcr'
drop _merge
keep if year>=1990 & year<=2015

preserve
collapse (rawsum) weight* fs* tanf* ss* ui*, by(year)
foreach x of varlist weight_* fs* tanf* ss* ui* {
 	replace `x'=`x'/1000000
 }
// SIPP pop counts don't make sense. Not sure why. Use CPS numbers instead.
// Also notice that counts for UR are lower --> they represent labor force only.
restore

* Fix CPS welfare spending here. 
foreach x in fs tanf ssi ss ui {
	replace `x'_spend = `x'_spend*237.017/cpi 
	gen `x'_cps_pop = `x'_spend/population
}

* Fix SIPP welfare spending here
foreach x in fs tanf ssi ss ui {
	replace `x'_amt = `x'_amt*237.017/cpi 
	gen `x'_sipp_pop = `x'_amt/population
	
	replace `x'_amt_imp = `x'_amt_imp*237.017/cpi 
	gen `x'_sipp_imp_pop = `x'_amt_imp/population
}

*** Add EITC data
merge m:1 statefip year using "${rawdata}/StateYear/eitc_vals_8415.dta"
drop _merge
keep if year>=1993 & year<=2015

merge m:1 year kids using "${rawdata}/StateYear/eitc_maxben_8015.dta"
drop _merge
keep if year>=1993 & year<=2015
gen steitc_amount=eitcmax*eitc_val*0.237017/cpi 

*** Add TANF Max Benefits
merge 1:1 year statefip kids using "${rawdata}/StateYear/afdc_benefits_famsize.dta"
drop _merge
keep if year>=1993 & year<=2015
replace maxben=maxben*0.237017/cpi 

merge m:1 statefip year using "${rawdata}/StateYear/state_weeklywages.dta", gen(stwages)
replace avweekwage=avweekwage*2.37017/cpi

*** Add UI data (in Thousands)
merge m:1 statefip year using "${rawdata}/StateYear/eta_uifunds_3816.dta"
drop ui_wage ui_aww ui_rr ui_high_co state
sort statefip year
keep if year>=1990 & year<=2015

* Fix UI
for any ui_benefit ui_reserves ui_fed_loans ui_net_rese: replace X=X*237.017/cpi
for any ui_benefit ui_reserves ui_fed_loans ui_net_rese: replace X=X/(pop)

* Lags
for any 1 2: bysort statefip: gen ui_netres_lagX=ui_net_reserves[_n-X]
drop if _merge==2
keep if year>=1990 & year<=2015
drop _merge

*** Label all new variables
label var cpi "CPI"

foreach x in mar ip {
	for any 1 2: label var urate_`x'_lagX "X Lag Unemployment Rate"
	for any 1 2: label var urate_`x'_chX "X-Yr $\% \Delta$ Unemployment Rate"
	label var urate_`x' "Unemployment Rate - CPS"
}
label var epop_ip "Employment Rate - CPS"
label var max "St Max UI Benefit (\\$2015)"
label var min "St Min UI Benefit (\\$2015)"
label var sim_repl_sipp "State Simulated Replacement Rate"
label var wba "St Simulated WBA (\\$2015)"
label var steitc_amount "St EITC Max (\\$2015, 1000s)"
label var maxben "St Max AFDC (\\$2015, 1000s)"

for any cps sipp sipp_imp: label var fs_X_pop "FS Spending/Pop"
for any cps sipp sipp_imp: label var tanf_X_pop "AFDC Spending/Pop"
for any cps sipp sipp_imp: label var ssi_X_pop "SSI Spending/Pop"
for any cps sipp sipp_imp: label var ss_X_pop "SS Spending/Pop"
for any cps sipp sipp_imp: label var ui_X_pop "UI Spending/Pop"

label var ui_net_reserves "State UI Net Reserves/GSP"
label var ui_netres_lag1 "1 Lag (UI Net Reserves/GSP)"
label var ui_netres_lag2 "2 Lag (UI Net Reserves/GSP)"

drop *_spend *_amt* *4 *5
keep if year>=1993 & year<=2015
gen trend = year-1993

for any urate_ip epop_ip urate_ip_lag1 urate_ip_lag2 ui_netres_lag1 ui_netres_lag2: replace X=X*100	
for any urate_ip avweekwage epop_ip urate_ip_lag1 urate_ip_lag2 ui_netres_lag1 ui_netres_lag2: gen Xsq=X*X
for any urate_ip avweekwage epop_ip urate_ip_lag1 urate_ip_lag2 ui_netres_lag1 ui_netres_lag2: gen Xcu=X*X*X

label var urate_ip "Unemployment Rate (\%)"
label var urate_ipsq "Unemployment Rate\textsuperscript{2}"
label var urate_ipcu "Unemployment Rate\textsuperscript{3}"

label var epop_ip "Employment/Population"
label var epop_ipsq "Employment/Population\textsuperscript{2}"
label var epop_ipcu "Employment/Population\textsuperscript{3}"

for any 1 2: label var urate_ip_lagX "X Lag Unemployment Rate (\%)"
for any 1 2: label var urate_ip_lagXsq "X Lag Unemployment Rate\textsuperscript{2}"
for any 1 2: label var urate_ip_lagXcu "X Lag Unemployment Rate\textsuperscript{3}"

for any 1 2: label var ui_netres_lagX "X Lag (UI Net Reserves/GSP) (\%)"
for any 1 2: label var ui_netres_lagXsq "X Lag (UI Net Reserves/GSP)\textsuperscript{2}"
for any 1 2: label var ui_netres_lagXcu "X Lag (UI Net Reserves/GSP)\textsuperscript{3}"

label var sim_repl_sipp "R-rate"
replace sim_repl_sipp=sim_repl_sipp*100

save "${regdata}/state_kids_data.dta", replace


			******************************************************
***************** 		CREATE INDIVIDUAL DATASET		***********************
			******************************************************

***********************
*** Append data and clean
***********************
use `spell_1996', clear
foreach syear in 2001 2004 2008 {
	append using `spell_`syear''
}
keep if statefip<60 & statefip!=.

*** Fix demographics
drop if kids==.
drop if ed==.
drop if mardum==.

* One race variable
gen race=1 if white==1
replace race=2 if black==1 
replace race=3 if hisp==1 
replace race=4 if other==1
drop if race==.

* Sex
gen female = sex==1
drop if sex==.
drop sex

* Age
drop if age<18 | age>60
gen agegr=.
replace agegr=1 if age>=18 & age<25
for any 2 3 4 5 6 7 8: replace agegr=X if age>=20+X*5-5 & age<20+X*5
replace agegr=8 if age>=55

* Marital status
rename mardum married

* Education
replace ed=. if ed==-1
gen edgr=1 if ed<=11
replace edgr=2 if ed==12 
replace edgr=3 if ed>=13 & ed<=15
replace edgr=4 if ed>=16
drop if ed==.
tab edgr, g(educ)
gen college = edgr==4
gen lesshs = edgr==1

*************************************
*** Add other state-year variables
*************************************
sort statefip
merge m:1 statefip using "${rawdata}/StateYear/statecodes_all.dta"
keep if _merge==3
drop _merge

*** Merge UI max and mins
gen nndate=year*10000+1*100 if mth<7
replace nndate=year*10000+7*100 if mth>=7

sort year nndate statefip
merge m:1 nndate statefip kids using "${rawdata}/StateYear/uilaws_updated.dta"
keep if _merge==3
drop _merge	

*** Simulated instruments
merge m:1 kids year statefip using "${regdata}/instrument_sipp_y", gen(instm) keepus(sim_repl_sipp* wba*)
keep if instm==3
drop instm	

*** State controls and variables
merge m:1 year statefip using "${regdata}/state_data", gen(statem)
keep if year>=1990
keep if statem==3
drop statem

for any max min annwg: gen X_cpi = X*2.37017/cpi
label var max_cpi "Max"
label var min_cpi "Min"
rename mth imonth

*** HIIND / MCAID: 0 or -1 NA, 1 Y, 2 N
for any hiind caidcov: replace X=. if (X==2 & panel<1996) | ((X==0 | X==-1) & panel>=1996)
for any hiind caidcov: replace X=0 if X==2 & panel>=1996
rename hiind priv_hins
label var priv_hins "Private Health Insurance"
label var caidcov "Medicaid"

*** MCARE / Hospital Stays 0 or -1 NA, 1 Y, 2 N
for any carecov: replace X=. if X==0 | X==-1
for any carecov: replace X=0 if X==2
label var carecov "Medicare"

*** Generate any insurance
gen any_hins = priv_hins==1 | carecov==1 | caidcov==1
replace any_hins=. if priv_hins==. & carecov==. & caidcov==.

gen pub_hins = carecov==1 | caidcov==1
replace pub_hins=. if carecov==. & caidcov==.
label var pub_hins "Medicare/Medicaid"

capture drop _I*

compress
save "${regdata}/sipp_cleaned.dta", replace


			*************************************************
***************** 		CREATE REGRESSION DATASET		***********************
			*************************************************

use "${regdata}/sipp_cleaned.dta", clear
forvalues sub = 0/2 {    	// create subsamples based on reasons for job loss
	
	preserve
	keep if panel>=1996
	egen uniqueid=group(panel suid pnum female)

	*** Create definition of job loss
	* Explore variables to use first
	for any stlemp1_yn whystop1: replace X=. if X==-1		// set 'not in universe' as missing

	* Months since job loss variable
	gen ym_now = ym(year, imonth)
	replace enddate1=. if enddate1==-1
	format enddate1 %10.0f  
	bysort uniqueid: egen nval_enddate1=nvals(enddate1)
	generate yr_loss = int(enddate1/10000)
	generate mth_loss = int((enddate1 - yr_loss*10000)/100)
	gen ym_loss = ym(yr_loss, mth_loss)
	drop nval_enddate1 		// decide to use first job loss
	sort uniqueid order

	* Indicator for whether month of job loss is current month
	gen loss = ym_loss==ym_now & ym_loss!=. & ym_now!=.	
	if `sub' == 0 {
		replace loss=0 if whystop1!=1 & whystop1!=9 & whystop1!=10 & whystop1!=13		// involuntary job loss
		}
	else if `sub' == 1 {
		replace loss=0 if whystop1!=12		// quit for another job
		}
	else {
		replace loss=0 if whystop1==1 | whystop1==9 | whystop1==10 | whystop1==13		// other separations
		}
	//rename jobloss jobloss_old

	*** Months since involuntary job loss
	bysort uniqueid: egen num_losses=total(loss)
	gen temp_order_loss = 9999
	replace temp_order_loss=order if loss==1
	bysort uniqueid: egen order_loss=min(temp_order_loss)
	replace order_loss=. if order_loss==9999
	gen month_reljl = order-order_loss
	replace month_reljl=. if order_loss==.
	replace month_reljl=-18 if month_reljl<=-18 & month_reljl!=.
	replace month_reljl=30 if month_reljl>=30 & month_reljl!=.
	gen afterloss = month_reljl>0 & month_reljl!=.
	replace afterloss=. if month_reljl==.
	drop temp_order_loss order_loss

	*** Generate variable for type of insurance in month prior to job loss
	for any notown own employer: gen temp_X_prior=0
	replace temp_notown_prior=1 if hiown==2 & month_reljl==-1
	replace temp_own_prior=1 if (hiown==1 | hiown==3) & month_reljl==-1
	replace temp_employer_prior=1 if hisrc==1 & (hiown==1 | hiown==3) & month_reljl==-1
	sort uniqueid order
	for any notown own employer: bysort uniqueid: egen X_prior=max(temp_X_prior)
	drop temp_*

	//hours
	replace uhours=. if uhours==-8					// -8 means "hours vary"
	replace uhours=0 if uhours==-1					// -1 means "not in universe"

	// cpi
	for any hinc hearn totinc earn uiamt fs_amt tanf_amt ssi_amt ss_amt: replace X=X*237.017/cpi

	// hospital Stays 0 or -1 NA, 1 Y, 2 N
	for any ehospsta: replace X=. if X==0 | X==-1 
	for any ehospsta: replace X=0 if X==2
	label var ehospsta "Hospital Stays"

	// source of insurance
	gen own_hins = priv_hins==1 & (hiown==1 | hiown==3)
	gen employ_hins = priv_hins==1 & hisrc==1 & (hiown==1 | hiown==3)
	gen else_hins = priv_hins==1 & hiown==2
	for any own employ else: replace X_hins=. if priv_hins==.
	label var own_hins "Own Insurance"
	label var employ_hins "Own-Employer Insurance"
	label var else_hins "Someone Else Insurance"

	// other health vairiables
	gen genhlth=.
	for any 1 2 3 4 5: replace genhlth=X if ehltstat==6-X 
	gen goodgenhlth = genhlth>=3
	replace goodgenhlth=. if genhlth==.
	gen excgenhlth = genhlth==5			// excellent
	replace excgenhlth=. if genhlth==.
	gen checkup_ly = evisdoc>0 	// 1 if yes checkup
	replace checkup_ly=. if evisdoc==.
	for any checkup_ly genhlth goodgenhlth: replace X=. if refmth!=4 
	label var checkup_ly "Doctor Visit"

	***************************
	*** Controls
	***************************
	drop if age>60
	sort uniqueid order

	*** Weights, state, kid and year of unemployment
	foreach x in p5wgt statefip kids year {
		gen temp=`x' if month_reljl==0
		bysort uniqueid: egen `x'_m0=max(temp)
		drop temp
	}
	for any statefip kids year: rename X X_m
	for any statefip kids year: rename X_m0 X

	* Simulated instruments
	drop sim_repl_sipp*
	merge m:1 kids year statefip using "${regdata}/instrument_sipp_y", gen(instm) keepus(sim_repl_sipp*)
	keep if instm==3
	drop instm	
	label var sim_repl_sipp "Sim. R-Rate"
	for any statefip kids year: rename X X_m0
	gen trend=year_m-1995
	for any urate avweekwage: gen Xsq=X*X
	for any urate avweekwage: gen Xcu=X*X*X
	
	* Keep only those with at least one spell
	bysort uniqueid: egen loser=total(loss)
	replace loser=1 if loser>1 & loser!=.
	keep if loser==1

	* Create month in which spell occurred 
	for any max min: bysort uniqueid: egen X_month_reljl=X(month_reljl)
	drop if month_reljl<-12 | month_reljl>=24
	
	gen month_reljl_temp = month_reljl
	replace month_reljl_temp=-12 if month_reljl_temp<=-11
	replace month_reljl_temp=22 if month_reljl_temp>=22
	forvalues i=-10(2)20 {
		replace month_reljl_temp=`i' if month_reljl_temp==`i'+1
	}
		 
	tab month_reljl_temp, gen(dumspell)
	for num 1/18: gen sim_spellX=sim_repl_sipp*dumspellX
	drop dumspell6 sim_spell6		// drop month before job loss, so everything becomes relative to that
	cap drop jobloss
	gen jobloss=month_reljl>=0
	for any kids_m statefip_m year_m: egen Xloss=group(X jobloss)

	gen post=month_reljl>=0 & month_reljl!=.
	replace post=. if month_reljl==.
	gen sim_post=0
	replace sim_post=sim_repl_sipp if month_reljl>=0
	label var sim_post "R-rate * Loss"

	
	if `sub'==0 save "${regdata}/sipp_reg.dta", replace
	else save "${regdata}/sipp_reg_plac`sub'.dta", replace
	restore
}



********************************************************************************
*****************************   BRFSS DATASETS   *******************************
********************************************************************************

			*********************************************
***************** 		EXTRACT BRFSS DATASETS		***********************
			*********************************************

local demographics state ctycode finalwt imonth iyear iday age sex racegr orace hispanic marital ///
	chld04 chld0512 chld1317 children educa employ income
	
local outcomes profexam lengexam bmi exerany smoker alcdays drinkge5 pregnant genhlth physhlth ///
	menthlth hlthplan checkup medcost medreas medcare
	
cd "${rawdata}/BRFSS"
!unzip BRFSS.zip

*** First keep only variables of interest		
forvalues data=1993(1)2015 {
	display ""	
	display "THIS IS LOOP FOR `data'"
	import sasxport "${rawdata}/BRFSS/BRFSS/CDBRFS`data'.XPT", clear
	
	* Fix year variable
	destring iyear, replace
	replace iyear=1900+iyear if iyear<100
	
	* Fix demographic variable names
	if `data'>=2013 rename _age80 age
	if `data'>=2001 & `data'<=2012 {
		rename _prace orace 						// not that orace has majority values missing. so use prace instead.
		rename hispanc2 hispanic
	}
	if `data'>=2013 rename _hispanc hispanic
	if `data'>=2013 rename _prace orace 
	if `data'>=2011 rename _llcpwt _finalwt	// change in type of weighting too
	
	for any chld04 chld0512 chld1317: if `data'==1993 | `data'>=2001 gen X=.  // no such variable in first year
	if `data'>1993 & `data'<2001 gen children = chld04 + chld0512 + chld1317
	
	if `data'==2011 | `data'==2012 rename ctycode1 ctycode		// nothing changed
	if `data'>=2013 gen ctycode=.
	
	if `data'>=1996 rename income2 income
	if `data'==1995 rename income95 income
	
	* Outcome names
	if `data'>=2000 & `data'<=2002 rename _bmi2 _bmi		// check what changed
	if `data'==2003 rename _bmi3 _bmi						// check what changed
	if `data'>=2004 & `data'<=2010 rename _bmi4 _bmi
	if `data'>=2011 rename _bmi5 _bmi
	
	if `data'>=2004 rename weight2 weight		// weight also in kilograms
	
	if `data'==2004 rename height2 height		// weight
	if `data'>=2005 rename height3 height		// no more meters/centimeters
	
	for any medcare medreas: if `data'!=2002 gen X=.		// change in question
	if `data'==2002 gen medcost=.
	
	if `data'==2003 | `data'==2004 gen checkup=. 	// nothing changed
	if `data'>=2007 rename checkup1 checkup
	
	if `data'>=2011 rename hlthpln1 hlthplan		// nothing changed
	
	if `data'>=2001 rename exerany2 exerany 
	
	if `data'==1993 rename _smoker smoker
	if `data'>=1994 & `data'<2005 rename _smoker2 smoker
	if `data'>=2005 rename _smoker3 smoker
	
	if `data'==2001 rename pregnt2 pregnant
	
	if `data'==2001 rename bphigh2 bphigh
	if `data'>=2002 & `data'<2005 rename bphigh3 bphigh
	if `data'>=2005 & `data'!=2006 & `data'!=2008 & `data'!=2010 & `data'!=2012 & `data'!=2014 rename bphigh4 bphigh
	if `data'==2006 | `data'==2008 | `data'==2010 | `data'==2012 | `data'==2014 gen bphigh=.
	
	if `data'<2001 rename nalcocc avedrnk
	if `data'>=2005 rename avedrnk2 avedrnk
	
	if `data'<2001 rename alcohol alcdays
	if `data'>=2002 & `data'<2005 rename alcday3 alcdays
	if `data'>=2005 & `data'<2011 rename alcday4 alcdays
	if `data'>=2011 rename alcday5 alcdays
	
	if `data'>=2001 & `data'<2006 rename drnk2ge5 drinkge5
	if `data'>=2006 rename drnk3ge5 drinkge5
	
	if `data'>=1997 & `data'<2001 gen hadrectl=.
	if `data'>=2001  & `data'<2011 rename digrecex hadrectl
	if `data'>=1997 & `data'<2001 gen lastexam=.
	if `data'>=2001 & `data'<2011 rename dretime lastexam
	for any hadrectl lastexam: if `data'>=2011 gen X=.
	
	if `data'==1993 gen dradvice=.
	
	if `data'==1993 gen doctdiab=.
	
	if `data'<1996 gen sexcondm=.
	if (`data'>=2002 & `data'<2004) | `data'>=2005 gen sexcondm=.
	
	for any rsnnocov ratecare distcare: if `data'<1996 | `data'>=2003 gen X=.		
	if `data'==2000 rename hcrate ratecare 
	if `data'==2000 | `data'==2002 gen distcare=.	
	for any ratecare distcare: if `data'==2001 gen X=.
	if `data'>=2001 & `data'<2003 rename rsnocov2 rsnnocov
	if `data'==2002 gen ratecare=.	
	
	for any lastdent reasdent dentlins: if `data'<1995 | `data'>=2005 gen X=.
	for any reasdent dentlins: if `data'==1999 | (`data'>=2002 & `data'<2005) gen X=.
	if `data'>=1999 & `data'<=2004 rename lastden2 lastdent
	
	for any qlmental qlstress qlhlthy qlrest: if `data'<1995 | `data'==1999 | `data'==2003 gen X=.
	if (`data'>=2001 & `data'<2003) | `data'>=2004 rename qlmentl2 qlmental
	if (`data'>=2001 & `data'<2003) | `data'>=2004 rename qlstres2 qlstress
	if (`data'>=2001 & `data'<2003) | `data'>=2004 rename qlhlth2 qlhlthy
	if (`data'>=2001 & `data'<2003) | (`data'>=2004 & `data'<=2012) rename qlrest2 qlrest
	if `data'>=2013 gen qlrest=.
	
	if `data'==2004 | `data'>=2006 gen dradvice=. 
	
	if `data'>=2004 & `data'<2011 rename diabete2 diabetes
	if `data'>=2011 rename diabete3 diabetes
	
	for any frtserv: if `data'==2004 | `data'==2006 | `data'==2008 | `data'>=2010 gen _X=.
	
	if `data'==2004 rename flushot2 flushot
	if `data'>=2005 & `data'<2010 rename flushot3 flushot
	if `data'==2010 rename flushot4 flushot
	if `data'==2011 | `data'==2012 rename flushot5 flushot
	if `data'>=2013 rename flushot6 flushot
	
	if `data'==2006 | `data'==2008 | `data'==2010 | `data'==2012  | `data'==2014 gen bloodcho=.
	
	if (`data'>=2001 & `data'<2006) | `data'==2007 | `data'==2009 | `data'==2011  | `data'==2013 | `data'==2015 ///
		rename toldhi2 toldhi
	if `data'==2006 | `data'==2008 | `data'==2010 | `data'==2012 | `data'==2014 gen toldhi=.
	
	if `data'>=2004 rename hadpap2 hadpap
	if `data'>=2004 rename lastpap2 lastpap
	
	if (`data'>=1999 & `data'<=2001) | (`data'>=2003 & `data'<=2005) | `data'==2007 | `data'==2009 ///
		gen seatbelt=.
		
	if `data'>=2013 rename employ1 employ
	
	for any state finalwt bmi racegr frtserv: rename _X X
	
	keep `demographics' `outcomes'
	gen survey=`data'
	
	gen unemployed=employ==4

	tempfile `data'
	save ``data'', replace
}	

*** Now append together
use `1993', clear
forvalues data=1994(1)2015 {
	append using ``data'', nolabel force
}	

drop if iyear<1993 | iyear==2016		// only a few weird obs
drop if state>56						// PR, Guam and VI

rename state statefip 
rename iyear year


			*********************************************
***************** 		CLEAN BRFSS DATASETS		***********************
			*********************************************

*********************
***	MAIN OUTCOMES
*********************
*** Breast exam
rename profexam brexam
replace brexam=. if brexam>=7 & brexam!=.
replace brexam=0 if brexam==2

*** Breast exam last year
gen bexamly=0 
replace bexamly=1 if lengexam==1 
replace bexamly=. if lengexam==7 | lengexam==9 | lengexam==.
drop lengexam

*** BMI
table year, c(mean bmi)
replace bmi = bmi/10 if survey<=2000
replace bmi = bmi/10000 if survey==2001
replace bmi = bmi/100 if survey>=2002
replace bmi=. if bmi>=99.9 & bmi!=.
gen low_bmi = bmi<18.5
gen high_bmi = bmi>25
gen norm_bmi = low_bmi==0 & high_bmi==0
for any low_bmi high_bmi norm_bmi: replace X=. if bmi==.

*** Any exercise 	
gen any_exer=exerany==1
replace any_exer=. if exerany==. | exerany>2

*** Smoking
gen smoking=smoker<=2
replace smoking=. if smoker==. | smoker==9
gen daily_smoker=smoker==1
replace daily_smoker=. if smoker==. | smoker==9

*** Drinking
replace alcdays=0 if alcdays==888		// days drinking per month (intensive)
replace alcdays=. if alcdays>=777
replace alcdays=(alcdays-100)*31/7 if alcdays<200 & alcdays>0 & alcdays!=.
replace alcdays=alcdays-200 if alcdays>200 & alcdays>0 & alcdays!=.

replace drinkge5=0 if drinkge5==88	// times with 5 or more drinks
replace drinkge5=. if drinkge5>=77

*** Pregnant
replace pregnant=. if pregnant>=7 & pregnant!=.
replace pregnant=0 if pregnant==2

*** General Health
replace genhlth=. if genhlth==7 | genhlth==9

*** Physical Health
replace physhlth=0 if physhlth==88
replace physhlth=. if physhlth==77 | physhlth==99

*** Mental Health
replace menthlth=0 if menthlth==88
replace menthlth=. if menthlth==77 | menthlth==99

*** General health plan
gen plan=hlthplan==1
replace plan=. if hlthplan==. | hlthplan>2

*** Last routine check-up  // could do more here
gen check_lasty=checkup==1
replace check_lasty=. if checkup==. | checkup>4

*** Could afford doctor
gen nafford=medcost==1
replace nafford=. if medcost==. | medcost>2
replace nafford=1 if medcare==1 & medreas==1 & survey==2002
replace nafford=0 if medcare==2 & survey==2002

*** Fix some outcomes for easier use: positive means better outcomes
* Fix Could Afford Doctor
replace nafford=2 if nafford==1	
replace nafford=1 if nafford==0
replace nafford=0 if nafford==2

* Fix General Health
gen temp=.
for any 1 2 3 4 5: replace temp=X if genhlth==6-X
replace genhlth=temp
gen goodgenhlth = genhlth>=3
replace goodgenhlth=. if genhlth==.
gen excgenhlth = genhlth==5	
replace excgenhlth=. if genhlth==.
drop temp 

* Fix Physical Health
replace physhlth = 30-physhlth
replace menthlth = 30-menthlth
gen goodphyshlth=physhlth==30
replace goodphyshlth=. if physhlth==.
gen goodmenthlth=menthlth==30
replace goodmenthlth=. if menthlth==.

*********************
***	MAIN CONTROLS
*********************
gen working = employ==1
keep if working==1 | unemployed==1	
gen obsid=_n

*** Sex
gen female=sex==2
gen male=sex==1

*** Age
drop if age<18 | age>80
gen agegr=.
replace agegr=1 if age>=18 & age<25
for any 2 3 4 5 6 7 8 9 10 11: replace agegr=X if age>=20+X*5-5 & age<20+X*5
replace agegr=12 if age>=75 & age<=80

*** Number of children in household
replace children=0 if children>=77 & children<=88	
for any chld04 chld0512 chld1317: replace X=0 if X==8
replace children = chld04 + chld0512 + chld1317 if survey>=1994 & survey<=2000

replace children=. if children==99
replace children=. if chld04==9 | chld0512==9 | chld1317==9
drop if children==.

gen kids=children
replace kids=4 if kids>4 & kids!=.

*** Race variables
drop if orace>=7

* hispanics
rename hispanic hisp
drop if hisp>=3
replace hisp=0 if hisp==2 & hisp!=.

* black, white, other
gen black=orace==2 & hisp==0
gen white = orace==1 & hisp==0
gen other = black==0 & hisp==0 & white==0

* unique race variable
gen race=1 if white==1
replace race=2 if black==1 
replace race=3 if hisp==1
replace race=4 if other==1

*** Marital status
gen married=marital==1
drop if marital==9		// don't knows and missings

*** Education
gen edgr=1 if educa<=3
replace edgr=2 if educa==4
replace edgr=3 if educa==5
replace edgr=4 if educa==6
drop if educa==9 | educa==.
gen college=edgr==4
gen lesshs=edgr==1

*** Income
replace income=77 if income==8 & year==1993
replace income=99 if income==9 & year==1993
replace income=99 if income==.

*** Label main variables of interest	
label variable agegr "5-yr Age Group"
label variable edgr "Education Group"
label variable female "Female"
label variable male "Male"
label variable married "Married"
label var college "College"
label var lesshs "Less HS"
label variable children "N. Children"
label variable kids "N. Children, censored"
label variable white "White"
label variable black "Black"
label variable hisp "Hispanic"
label variable other "Other Race"
label variable finalwt "Weight"
label var unemployed "Unemployed"

label var plan "Health Insurance Coverage"
label var check_lasty "Checkup Last Year" 
label var nafford "Can Afford Doctor"
label var bexamly "Breast Exam Last Year" 
label var genhlth "Self-Reported Health"
label var goodgenhlth "Good Health"
label var excgenhlth "Excellent Health"
label var physhlth "Days with Good Physical Health"
label var menthlth "Days with Good Mental Health"
label var goodphyshlth "Always Good Physical Health"
label var goodmenthlth "Always Good Mental Health"
label var high_bmi "High BMI"
label var norm_bmi "Normal BMI" 
label var low_bmi "Low BMI" 
label var any_exer "Any Exercise" 
label var daily_smoker "Smoking Daily" 
label var alcdays "Days Drinking"
label var drinkge5 "Days Binge Drinking"
	
drop if racegr==.
drop hlthplan medcost checkup orace marital educa sex bmi ///
	exerany smoker chld* medcare medreas
keep if year>=1993 & year<=2015

compress
save "${regdata}/brfss_cleaned.dta", replace


			****************************************************
***************** 		CREATE REGRESSION DATASETS			***********************
			****************************************************

use "${regdata}/brfss_cleaned.dta", clear
keep if age<=60
gen trend=year-1993 		

*** Merge other datasets
* State controls and variables
merge m:1 statefip using "${rawdata}/StateYear/statecodes_all.dta", gen(codem)
drop codem

merge m:1 year statefip using "${regdata}/state_data", gen(statem)
drop if statem==2
drop statem

merge m:1 state using "${rawdata}/StateYear/med_st_gen.dta"
drop _merge

* UI policy 
destring imonth, replace
gen double nndate=. 
replace nndate = year*10000+1*100 if imonth<=6 & imonth!=.
replace nndate = year*10000+7*100 if imonth>=7 & imonth<=12 & imonth!=.

merge m:1 nndate statefip kids using "${rawdata}/StateYear/uilaws_updated.dta", gen(uilawsm)
keep if uilawsm==3
drop uilawsm

* New simulated instruments
merge m:1 kids year statefip using "${regdata}/instrument_sipp_y", gen(instm) keepus(sim_repl_sipp* wba*)
keep if instm==3
drop instm

*** Generate and label new variables of interest
for any max min: gen X_cpi = X*2.37017/cpi			// 2015

label var urate "Unemployment Rate"
label var cpi "CPI"
label var state "State Name"
label var max_cpi "Max Benefit (\\$2015, Hundreds)"
label var min_cpi "Min Benefit (\\$2015, Hundreds)"

* Controls
egen state_unemp = group(statefip unemployed)
egen year_unemp = group(year unemployed)
egen kids_unemp = group(kids unemployed)
egen state_year_kids = group(statefip year kids)

gen sim_unemp=sim_repl_sipp*unemployed
label var sim_unemp "R-rate * Unemployed"
label var sim_repl_sipp "R-rate"

for any urate avweekwage: gen Xsq=X*X
for any urate avweekwage: gen Xcu=X*X*X

* Economic condition
sum urate [aw=finalwt], d
gen urate_50pc = urate<r(p50)
gen urate_00pc = urate>=r(p50)
for any urate_50pc urate_00pc: replace X=. if urate==.
for any 50 00: gen sim_urX = sim_repl_sipp*urate_Xpc
for any 50 00: gen sim_une_urX = sim_repl_sipp*unemployed*urate_Xpc
for any 50 00: gen unemp_urX = unemployed*urate_Xpc

label var urate_50pc "0-5.5 \% UR" 
label var urate_00pc "5.5+ \% UR"
label var sim_ur50 "R-rate * 0-5.5 \% UR"
label var sim_ur00 "R-rate * 5.5+ \% UR"
label var sim_une_ur50 "R-rate * 0-5.5 \% UR * Unemployed"
label var sim_une_ur00 "R-rate * 5.5+ \% UR * Unemployed"
label var unemp_ur50 "Unemployed * 0-5.5 \% UR"
label var unemp_ur00 "Unemployed * 5.5+ \% UR"

* Demographic
merge m:1 state using "${rawdata}/StateYear/med_st_gen.dta"
drop _merge
for any female married: gen sim_X=sim_repl_sipp*X
for any college childless genmed: gen sim_X=0
replace sim_college=sim_repl_sipp if edgr>3
replace sim_childless=sim_repl_sipp if kids==0
replace sim_genmed=sim_repl_sipp if rank<=26

label var sim_female "R-rate * Female"
label var sim_married "R-rate * Married"
label var sim_college "R-rate * College"
label var sim_childless "R-rate * No Children"
label var sim_genmed "R-rate * Generous Medicaid"

* WBA
for any max min: replace X_cpi=X_cpi/10
label var max_cpi "Max Benefits (\\$2015, 1000s)"
label var min_cpi "Min Benefits (\\$2015, 1000s)"

replace wba=(wba*237.017)/(cpi*1000)		//2015, 1000s
label var wba "WBA (\\$2015, 1000s)"

compress
save "${regdata}/brfss_reg.dta", replace


*******************************************************************************************************************
*******************************************************************************************************************

			*********************************************
*****************		CREATE SUMMARY DATASET		***********************
			*********************************************
			
use "${regdata}/sipp_cleaned.dta", clear
keep if panel>=1996
egen uniqueid=group(panel suid pnum female)

*** Create definition of job loss
for any stlemp1_yn whystop1  enddate1: replace X=. if X==-1		// set 'not in universe' as missing
gen ym_now = ym(year, imonth)
generate yr_loss = int(enddate1/10000)
generate mth_loss = int((enddate1 - yr_loss*10000)/100)
gen ym_loss = ym(yr_loss, mth_loss)
sort uniqueid order

* Indicator for whether month of job loss is current month
gen loss = ym_loss==ym_now & ym_loss!=. & ym_now!=.
replace loss=0 if whystop1!=1 & whystop1!=9 & whystop1!=10 & whystop1!=13

*** Months since involuntary job loss
gen temp_order_loss = 9999
replace temp_order_loss=order if loss==1
bysort uniqueid: egen order_loss=min(temp_order_loss)
replace order_loss=. if order_loss==9999

gen month_reljl = order-order_loss
replace month_reljl=. if order_loss==.
drop if month_reljl<-12 | month_reljl>=24

* Keep only those with at least one spell
bysort uniqueid: egen loser=total(loss)
replace loser=1 if loser>1 & loser!=.
keep if loser==1

*** Outcomes
for any hinc hearn totinc earn uiamt fs_amt tanf_amt ssi_amt ss_amt: replace X=X*237.017/cpi

*** Controls
drop if age>60
xtset uniqueid order
sort uniqueid order

* Weights, state, kid and year of unemployment
foreach x in p5wgt statefip kids year {
	gen temp=`x' if month_reljl==0
	bysort uniqueid: egen `x'_m0=max(temp)
	drop temp
}
for any statefip kids year: rename X X_m
for any statefip kids year: rename X_m0 X
gen sipp=1

* Rename variables t make them equal
capture drop children
rename kids children
keep if age<=60

*** ADD BRFSS
append using "${regdata}/brfss_cleaned.dta", force
keep if year>=1993
replace sipp=0 if sipp==.
replace finalwt=finalwt/100

* Outcomes
for any alcdays drinkge5: replace X=X/30

*** ADD PANEL SIPP
append using "${regdata}/uireplrates.dta", force
replace sipp=2 if sipp==.
keep if panel>=1996

gen real_wage = annwg*237.017/cpi if sipp==2
replace real_wage=. if sipp!=2
replace children=kids if sipp==2

keep if age<=60
replace children=4 if children>4 & children!=.

*** CREATE VARIABLES NEEDED FOR SUM STATS
gen group1=1 if sipp==1
replace group1=2 if sipp==2
replace group1=3 if sipp==0 & unemployed==1
replace group1=4 if sipp==0 & unemployed==0

gen weighting=p5wgt if sipp==1 | sipp==2
	replace weighting=finalwt if sipp==0
gen insurance=any_hins if sipp==1 | sipp==2
	replace insurance=plan if sipp==0

replace children=4 if children>4 & children!=.

label var female "Female"
label var white "White"
label var black "Black"
label var college "College"
label var married "Married"
label var children "N. Children"
label var age "Age"
label var real_wage "Annual Wage Prior To Job Loss (\\$2015)"
label var uiamt "Monthly UI Benefits (\\$2015)"
label var earn "Monthly Earnings (\\$2015)"
label var urate "Unemployment Rate"

label var insurance "Health Insurance Coverage"
label var check_lasty "Checkup Last Year" 
label var nafford "Can Afford Doctor"
label var bexamly "Breast Exam Last Year" 
label var goodgenhlth "Good Health Status" 
label var goodphyshlth "Physically Healthy" 
label var goodmenthlth "Mentally Healthy"	
label var high_bmi "High BMI" 
label var any_exer "Any Exercise" 
label var daily_smoker "Smoking Daily" 
label var alcdays "\% Days Drinking"
label var drinkge5 "\% Days Binge Drinking"

*Number them first
bysort group1: gen total = _N
label var total "Observations"

for any female white black college married children age earn uiamt real_wage insurance urate: replace X=0 if X==.
for any real_wage: replace X=0 if sipp!=2
for any earn uiamt priv_hins pub_hins: replace X=0 if sipp!=1
for any check_lasty nafford bexamly goodgenhlth goodphyshlth goodmenthlth high_bmi any_exer ///
	daily_smoker alcdays drinkge5: replace X=0 if sipp!=0

compress
save "${regdata}/sum_data.dta"


				*****************************************
******************* CREATE DATA FOR GENERATING FIGUES ***********************
				*****************************************		
					
***************************
*** Read data
***************************
use "${regdata}/sipp_cleaned.dta", clear
keep if panel>=1996
egen uniqueid=group(panel suid pnum female)

drop sim_repl_sipp*
merge m:1 kids year statefip using "${regdata}/instrument_sipp_y", gen(instm) keepus(sim_repl_sipp*)
keep if instm==3
drop instm	

***************************
*** Create sample of interest
***************************
*** Create definition of job loss
* Explore variables to use first
for any stlemp1_yn whystop1: replace X=. if X==-1		// set 'not in universe' as missing

* Months since job loss variable
gen ym_now = ym(year, imonth)
bysort stlemp1_yn: sum enddate1, d
replace enddate1=. if enddate1==-1 
bysort uniqueid: egen nval_enddate1=nvals(enddate1)

generate yr_loss = int(enddate1/10000)
generate mth_loss = int((enddate1 - yr_loss*10000)/100)
gen ym_loss = ym(yr_loss, mth_loss)
sort uniqueid order
format enddate1 %10.0f 
drop nval_enddate1 		// decide to use first job loss

* Indicator for whether month of job loss is current month
gen loss = ym_loss==ym_now & ym_loss!=. & ym_now!=.
replace loss=0 if whystop1!=1 & whystop1!=9 & whystop1!=10 & whystop1!=13

*** Months since involuntary job loss
bysort uniqueid: egen num_losses=total(loss)
gen temp_order_loss = 9999
replace temp_order_loss=order if loss==1
bysort uniqueid: egen order_loss=min(temp_order_loss)

replace order_loss=. if order_loss==9999
gen month_reljl = order-order_loss
replace month_reljl=. if order_loss==.
replace month_reljl=-18 if month_reljl<=-18 & month_reljl!=.
replace month_reljl=30 if month_reljl>=30 & month_reljl!=.
gen afterloss = month_reljl>0 & month_reljl!=.
replace afterloss=. if month_reljl==.

*** DROP those before 12 and after 24 months
* Keep only those with at least one spell
bysort uniqueid: egen loser=total(loss)
replace loser=1 if loser>1 & loser!=.
keep if loser==1

* Create month in which spell occurred 
for any max min: bysort uniqueid: egen X_month_reljl=X(month_reljl)
drop if month_reljl<-12 | month_reljl>=24

***************************
*** Outcomes
***************************
*** Earnings/work outcomes
//hours
replace uhours=. if uhours==-8					// -8 means "hours vary"
replace uhours=0 if uhours==-1					// -1 means "not in universe"

// cpi
for any hinc hearn totinc earn uiamt fs_amt tanf_amt ssi_amt ss_amt: replace X=X*237.017/cpi

*** Health outcomes
// hospital Stays 0 or -1 NA, 1 Y, 2 N
for any ehospsta: replace X=. if X==0 | X==-1 
for any ehospsta: replace X=0 if X==2
label var ehospsta "Hospital Stays"

// employer provided health insurance
gen employ_hins = priv_hins==1 & hisrc==1 & (hiown==1 | hiown==3)
replace employ_hins=. if priv_hins==.
label var employ_hins "Own-Employer Insurance"

// some one else's plan
gen else_hins = priv_hins==1 & hiown==2
replace else_hins=. if priv_hins==.
label var else_hins "Someone Else Insurance"

// other health vairiables
gen genhlth=.
for any 1 2 3 4 5: replace genhlth=X if ehltstat==6-X 
gen goodgenhlth = genhlth>=3
replace goodgenhlth=. if genhlth==.
gen excgenhlth = genhlth==5			// excellent
replace excgenhlth=. if genhlth==.

gen checkup_ly = evisdoc>0 	// 1 if yes checkup
replace checkup_ly=. if evisdoc==.

label var checkup_ly "Doctor Visit"
for any checkup_ly genhlth goodgenhlth: replace X=. if refmth!=4 

drop if age>60
xtset uniqueid order
sort uniqueid order

for any any_hins priv_hins pub_hins earn totinc uiamt num: gen X1=X if afterloss==1
for any any_hins priv_hins pub_hins earn totinc uiamt num: gen X0=X if afterloss==0

* Create differences first 
for any any_hins priv_hins pub_hins earn totinc uiamt: bysort uniqueid: egen unemp_X = mean(X1)
for any any_hins priv_hins pub_hins earn totinc uiamt: bysort uniqueid: egen emp_X = mean(X0)
for any any_hins priv_hins pub_hins earn totinc uiamt: gen drop_X = unemp_X - emp_X

* Average replacement rate
gen temp=sim_repl_sipp if afterloss==1
bysort uniqueid: egen unemp_rrate = mean(temp)
drop temp

save "${regdata}/sipp_fig.dta", replace

